I am new to tensor flow and here I ran into an annoying problem.
I am making a program that downloads the "raw data" of an image tf.WholeFileReader.read(image_name_queue)from a tfrecord file and then decodes it with tf.image.decode_jpeg(raw_data, channels=3), and then passes it through a function that vectorizes it.
main code
logging.info('setting up folder')
create_image_data_folder()
save_configs()
logging.info('creating graph')
filename_queue = tf.train.string_input_producer([
configs.TFRECORD_IMAGES_PATH],
num_epochs=1)
image_tensor, name_tensor = read_and_decode(filename_queue)
image_batch_tensor, name_batch_tensor = tf.train.shuffle_batch(
[image_tensor, name_tensor],
configs.BATCH_SIZE,
1000 + 3 * configs.BATCH_SIZE,
min_after_dequeue=1000)
image_embedding_batch_tensor = configs.IMAGE_EMBEDDING_FUNCTION(image_batch_tensor)
init = tf.initialize_all_variables()
init_local = tf.initialize_local_variables()
logging.info('starting session')
with tf.Session().as_default() as sess:
sess.run(init)
sess.run(init_local)
tf.train.start_queue_runners()
logging.info('vectorizing')
data_points = []
for _ in tqdm(xrange(get_n_batches())):
name_batch = sess.run(name_batch_tensor)
image_embedding_batch = sess.run(image_embedding_batch_tensor)
for vector, name in zip(list(image_embedding_batch), name_batch):
data_points.append((vector, name))
logging.info('saving')
save_pkl_file(data_points, 'vectors.pkl')
read_and_decode function
def read_and_decode(tfrecord_file_queue):
logging.debug('reading image and decodes it from queue')
reader = tf.TFRecordReader()
_, serialized_example = reader.read(tfrecord_file_queue)
features = tf.parse_single_example(serialized_example,
features={
'image': tf.FixedLenFeature([], tf.string),
'name': tf.FixedLenFeature([], tf.string)
}
)
image = process_image_data(features['image'])
return image, features['name']
The code works, but ultimately it encounters a bad, non-jpeg file, and an error occurs and the program stops working .
Error
InvalidArgumentError (see above for traceback): Invalid JPEG data, size 556663
I want to skip these "mistakes." I tried to surround the code with tryand except.
new code
for _ in tqdm(xrange(get_n_batches())):
try:
name_batch = sess.run(name_batch_tensor)
image_embedding_batch = sess.run(image_embedding_batch_tensor)
for vector, name in zip(list(image_embedding_batch), name_batch):
data_points.append((vector, name))
except Exception as e:
logging.warning('error occured: {}'.format(e))
, , try except , , .
? , , " ", , .