I am using Keras 2.0.4 (TensorFlow backend) for the task of classifying images (based on pre-processed models). In the training / setting time, I keep track of all the used metrics (eg categorical_accuracy, categorical crossentropy) using CSVLogger- including the relevant indicators relating to the inspection set (ie val_categorical_accuracy, val_categorical_crossentropy).
With a callback, ModelCheckpointI track the best weight configuration ( save_best_only=True). To evaluate a model in a validation set, use model.evaluate().
My expectation: the tracked metrics for the CSVLogger("best" era) are equal to the metrics calculated for model.evaluate(). Unfortunately, this is not the case. Indicators differ by + - 5% . Is there a reason for this behavior?
ED i T:
After some testing, I could get some ideas:
- If I do not use the generator for training and data verification (and therefore not
model.fit_generator()), the problem does not arise. → Use ImageDataGeneratorfor training and validation data is a source of inconsistency. (Note: evaluateI don’t use the generator for the calculation , but I do use the same validation data (at least if DataImageGeneratorit works as expected ...).
I think ImageDataGenerator is not working properly (please also see this ). - , .
CSVLogger ( "" ) , model.evaluate().
, : , (, loss) (, val_loss).
( )
:
from __future__ import print_function
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Model, load_model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, GaussianDropout, Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras import metrics
import os
import sys
from scipy import misc
import numpy as np
from keras.applications.vgg16 import preprocess_input as vgg16_preprocess_input
from keras.applications import VGG16
from keras.callbacks import CSVLogger, ModelCheckpoint
seed = 1337
loss_function = 'categorical_crossentropy'
learning_rate = 0.001
epochs = 10
batch_size = 20
nb_classes = 5
img_width, img_height = 400, 400
chosen_optimizer = SGD(lr=learning_rate, momentum=0.0, decay=0.0, nesterov=False)
steps_per_epoch = 40 // batch_size
validation_steps = 40 // batch_size
data_dir =
filepath =
file_best_checkpoint= 'best_epoch.hdf5'
file_csvlogger = 'logged_metrics.txt'
modelcheckpoint_best_epoch= ModelCheckpoint(filepath=os.path.join(filepath, file_best_checkpoint),
monitor = 'val_loss' , verbose = 1,
save_best_only = True,
save_weights_only=False, mode='auto',
period=1)
csvlogger = CSVLogger(os.path.join(filepath, file_csvlogger) , separator=',', append=False)
X_val, Y_val =
my_preprocessing_function = mf.my_vgg16_preprocess_input
train_datagen = ImageDataGenerator(preprocessing_function = my_preprocessing_function)
val_datagen = ImageDataGenerator(preprocessing_function = my_preprocessing_function)
train_data_dir = os.path.join(data_dir, 'train')
validation_data_dir = os.path.join(data_dir, 'val')
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
shuffle = True,
seed = seed,
class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
shuffle = True,
seed = seed,
class_mode='categorical')
print("\n---------------------------------------------------------------")
print("------------------------ training model -----------------------")
print("---------------------------------------------------------------")
base_model = VGG16(include_top=False, weights = None, input_shape=(img_width, img_height, 3), pooling = 'max', classes = nb_classes)
model_name = "VGG_modified"
for layer in base_model.layers:
layer.trainable = True
x = base_model.output
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(1024, activation='relu', name='fc2')(x)
predictions = Dense(nb_classes, activation='softmax', name='predictions')(x)
model = Model(outputs=predictions, name= model_name, inputs=base_model.input)
print(model.summary())
model.compile(optimizer = chosen_optimizer, loss=loss_function,
metrics=['categorical_accuracy','kullback_leibler_divergence'])
model.fit_generator(
train_generator,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_steps,
callbacks = [csvlogger, modelcheckpoint_best_epoch])
print("\n\n---------------------------------------------------------------")
print("------------------ Evaluation of Best Epoch -------------------")
print("---------------------------------------------------------------")
model = load_model(os.path.join(filepath, file_best_checkpoint))
list_of_metrics = model.evaluate(X_val, Y_val, batch_size=batch_size, verbose=1, sample_weight=None)
index = 0
print('\nMetrics:')
for metric in model.metrics_names:
print(metric+ ':' , str(list_of_metrics[index]))
index += 1
E D T 2
1. of E D T:
( evaluate_generator()), .
, , ...