If you use images with data generators, here is one way to do 10x cross-validation with Keras and scikit-learn. The strategy is to copy files to subtexts training, validationand testin accordance with each fold.
import numpy as np
import os
import pandas as pd
import shutil
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
def copy_images(df, directory):
destination_directory = "{path to your data directory}/" + directory
print("copying {} files to {}...".format(directory, destination_directory))
if os.path.exists(destination_directory):
shutil.rmtree(destination_directory)
if not os.path.exists(destination_directory):
os.makedirs(destination_directory)
for c in set(list(df['class'])):
if not os.path.exists(destination_directory + '/' + c):
os.makedirs(destination_directory + '/' + c)
for i, row in df.iterrows():
try:
path_from = "{path to all of your images}"
path_from = path_from + "{}.jpg"
path_to = "{}/{}".format(destination_directory, row['class'])
shutil.copy(path_from.format(row['filename']), path_to)
except Exception, e:
print("Error when copying {}: {}".format(row['filename'], str(e)))
df = pd.read_csv('{path to your data}.csv')
df_y = df['class']
df_x = df
del df_x['class']
skf = StratifiedKFold(n_splits = 10)
total_actual = []
total_predicted = []
total_val_accuracy = []
total_val_loss = []
total_test_accuracy = []
for i, (train_index, test_index) in enumerate(skf.split(df_x, df_y)):
x_train, x_test = df_x.iloc[train_index], df_x.iloc[test_index]
y_train, y_test = df_y.iloc[train_index], df_y.iloc[test_index]
train = pd.concat([x_train, y_train], axis=1)
test = pd.concat([x_test, y_test], axis = 1)
validation = train.sample(frac = 0.2)
train = train[~train['filename'].isin(list(validation['filename']))]
copy_images(train, 'training')
copy_images(validation, 'validation')
copy_images(test, 'test')
print('**** Running fold '+ str(i))
val_accuracy, val_loss = create_train_model();
total_val_accuracy.append(val_accuracy)
total_val_loss.append(val_loss)
actual, predicted = predict()
total_test_accuracy.append(accuracy_score(actual, predicted))
total_actual = total_actual + actual
total_predicted = total_predicted + predicted
print(classification_report(total_actual, total_predicted))
print(confusion_matrix(total_actual, total_predicted))
print(classification_report(total_actual, total_predicted))
print(confusion_matrix(total_actual, total_predicted))
print("Validation accuracy on each fold:")
print(total_val_accuracy)
print("Mean validation accuracy: {}%".format(np.mean(total_val_accuracy) * 100))
print("Validation loss on each fold:")
print(total_val_loss)
print("Mean validation loss: {}".format(np.mean(total_val_loss)))
print("Test accuracy on each fold:")
print(total_test_accuracy)
print("Mean test accuracy: {}%".format(np.mean(total_test_accuracy) * 100))
pred(), , , batch_size 1:
generator = ImageDataGenerator().flow_from_directory(
'{path to your data directory}/test',
target_size = (img_width, img_height),
batch_size = 1,
color_mode = 'rgb',
class_mode = 'categorical',
shuffle = False)
10- ( ). , , batch_size = 1 , , .