In the Tensorflow Dataset API, can you map a single item to multiple items?

In the tensorflow pipeline, DatasetI would like to define a custom map function that takes one input element (sample data) and returns multiple elements (sample data).

Below is my attempt, as well as the desired results.

I could not follow the documentation tf.data.Dataset().flat_map()well enough to understand if this is applicable here or not.

import tensorflow as tf

input = [10, 20, 30]

def my_map_func(i):
  return [[i, i+1, i+2]]       # Fyi [[i], [i+1], [i+2]] throws an exception

ds = tf.data.Dataset.from_tensor_slices(input)
ds = ds.map(map_func=lambda input: tf.py_func(
  func=my_map_func, inp=[input], Tout=[tf.int64]
))
element = ds.make_one_shot_iterator().get_next()

with tf.Session() as sess:
  for _ in range(9):
    print(sess.run(element))

Results:

(array([10, 11, 12]),)
(array([20, 21, 22]),)
(array([30, 31, 32]),)

Desired Results:

(10)
(11)
(12)
(20)
(21)
(22)
(30)
(31)
(32)
+4
source share
1 answer

This required two more steps. First, the map function should return a numpy array, not a list.

flat_map Dataset().from_tensor_slices(), . :

Tensorflow 1.5 ( /)

import tensorflow as tf
import numpy as np

input = [10, 20, 30]

def my_map_func(i):
  return np.array([i, i+1, i+2])

ds = tf.data.Dataset.from_tensor_slices(input)
ds = ds.map(map_func=lambda input: tf.py_func(
  func=my_map_func, inp=[input], Tout=[tf.int64]
))
ds = ds.flat_map(lambda x: tf.data.Dataset().from_tensor_slices(x))

element = ds.make_one_shot_iterator().get_next()

with tf.Session() as sess:
  for _ in range(9):
    print(sess.run(element))

, , (, ) . [10, 20, 30].

/ runnable:

import tensorflow as tf
import numpy as np

input = [b'testA', b'testB', b'testC']

def my_map_func(input):
  return np.array([input, input, input]), np.array([10, 20, 30])

ds = tf.data.Dataset.from_tensor_slices(input)
ds = ds.map(map_func=lambda input: tf.py_func(func=my_map_func, inp=[input], Tout=[tf.string, tf.int64]))
ds = ds.flat_map(lambda mystr, myint: tf.data.Dataset().zip((
  tf.data.Dataset().from_tensor_slices(mystr),
  tf.data.Dataset().from_tensor_slices(myint))
))

element = ds.make_one_shot_iterator().get_next()

with tf.Session() as sess:
  for _ in range(9):
    print(sess.run(element))
+1

Source: https://habr.com/ru/post/1692797/


All Articles