Tensor multidimensional linear regression does not converge

Question

Tensor multidimensional linear regression does not converge

I am trying to prepare a multidimensional linear regression model with regularization using tensor flow. For some reason, I can't get the training snippet of the code below to calculate the error I want to use to update the gradient descent. Am I doing something wrong in setting my schedule?

def normalize_data(matrix): averages = np.average(matrix,0) mins = np.min(matrix,0) maxes = np.max(matrix,0) ranges = maxes - mins return ((matrix - averages)/ranges) def run_regression(X, Y, X_test, Y_test, lambda_value = 0.1, normalize=False, batch_size=10): x_train = normalize_data(X) if normalize else X y_train = Y x_test = X_test y_test = Y_test session = tf.Session() # Calculate number of features for X and Y x_features_length = len(X[0]) y_features_length = len(Y[0]) # Build Tensorflow graph parts x = tf.placeholder('float', [None, x_features_length], name="X") y = tf.placeholder('float', [None, y_features_length], name="Y") theta = tf.Variable(tf.random_normal([x_features_length, y_features_length], stddev=0.01), name="Theta") lambda_val = tf.constant(lambda_value) # Trying to implement this way http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=MachineLearning&doc=exercises/ex5/ex5.html y_predicted = tf.matmul(x, theta, name="y_predicted") regularization_cost_part = tf.cast(tf.mul(lambda_val,tf.reduce_sum(tf.pow(theta,2)), name="regularization_param"), 'float') polynomial_cost_part = tf.reduce_sum(tf.pow(tf.sub(y_predicted, y), 2), name="polynomial_sum") # Set up some summary info to debug with tf.name_scope('cost') as scope: cost_func = tf.mul(tf.cast(1/(2*batch_size), 'float'), tf.cast(tf.add(polynomial_cost_part, regularization_cost_part), 'float')) cost_summary = tf.scalar_summary("cost", cost_func) training_func = tf.train.GradientDescentOptimizer(0.03).minimize(cost_func) with tf.name_scope("test") as scope: correct_prediction = tf.sub(tf.cast(1, 'float'), tf.reduce_mean(tf.sub(y_predicted, y))) accuracy = tf.cast(correct_prediction, "float") accuracy_summary = tf.scalar_summary("accuracy", accuracy) saver = tf.train.Saver() merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter("/tmp/football_logs", session.graph_def) init = tf.initialize_all_variables() session.run(init) for i in range(0, (len(x_train)/batch_size)): session.run(training_func, feed_dict={x: x_train[i*batch_size:i*batch_size+batch_size], y: y_train[i*batch_size:i*batch_size+batch_size]}) if i % batch_size == 0: result = session.run([merged, accuracy], feed_dict={x: x_test, y: y_test}) writer.add_summary(result[0], i) print "step %d, training accuracy %g"%(i, result[1]) print "test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) save_path = saver.save(session, "/tmp/football.ckpt") print "Model saved in file: ", save_path session.close()

my conclusion is as follows

 step 0, training accuracy 39.1802 step 10, training accuracy 39.1802 step 20, training accuracy 39.1802 ... step 210, training accuracy 39.1802 test accuracy 39.1802 Model saved in file: /tmp/football.ckpt

+5

python tensorflow

Jhafdahl Dec 10 '15 at 17:38

source share

1 answer

fr_andres · Answer 1 · 2017-04-04T13:55:03+0000

there seems to be a problem with the level of training: 0.03 may be too high depending on how your data looks. In addition, you probably want to create your graph separate from the session in a more explicit way, or even use the normal equations to achieve the optimal solution without the need for repetition if your dataset has medium / low dimensionality. Here I have posted some examples that you hopefully find useful! In addition, TT textbooks support it well (search for “Full program” on this page).

But with regards to your code, here is the version that worked for me: I changed some outdated functions and basically set the learning speed to a much lower value alpha=1e-8 , which (in the synthetic data set is also generated in the code) seems to converge:

 test accuracy 2176.11 test accuracy 1898.6 test accuracy 1663.69 test accuracy 1458.53 test accuracy 1287.57 test accuracy 1116.9 test accuracy 969.474 test accuracy 841.028 test accuracy 738.592 test accuracy 649.891 test accuracy 565.188 test accuracy 495.33 test accuracy 438.351 test accuracy 381.161 test accuracy 333.213 test accuracy 289.575 test accuracy 254.394 test accuracy 222.836 test accuracy 197.36 test accuracy 172.788 test accuracy 152.251 test accuracy 132.664 test accuracy 115.982 test accuracy 101.021 final test accuracy 90.2555

CODE:

 import tensorflow as tf import numpy as np # generate some dataset DIMENSIONS = 5 DS_SIZE = 5000 TRAIN_RATIO = 0.5 # 50% of the dataset isused for training _train_size = int(DS_SIZE*TRAIN_RATIO) _test_size = DS_SIZE - _train_size f = lambda(x): sum(x) # the "true" function: f = 0 + 1*x1 + 1*x2 + 1*x3 ... noise = lambda: np.random.normal(0,10) # some noise # training globals LAMBDA = 1e6 # L2 regularization factor # generate the dataset, the labels and split into train/test ds = [[np.random.rand()*1000 for d in range(DIMENSIONS)] for _ in range(DS_SIZE)] ds = [([1]+x, [f(x)+noise()]) for x in ds] # add x[0]=1 dimension and labels np.random.shuffle(ds) train_data, train_labels = zip(*ds[0:_train_size]) test_data, test_labels = zip(*ds[_train_size:]) def normalize_data(matrix): averages = np.average(matrix,0) mins = np.min(matrix,0) maxes = np.max(matrix,0) ranges = maxes - mins return ((matrix - averages)/ranges) def run_regression(X, Y, X_test, Y_test, lambda_value = 0.1, normalize=False, batch_size=10, alpha=1e-8): x_train = normalize_data(X) if normalize else X y_train = Y x_test = X_test y_test = Y_test session = tf.Session() # Calculate number of features for X and Y x_features_length = len(X[0]) y_features_length = len(Y[0]) # Build Tensorflow graph parts x = tf.placeholder('float', [None, x_features_length], name="X") y = tf.placeholder('float', [None, y_features_length], name="Y") theta = tf.Variable(tf.random_normal([x_features_length, y_features_length], stddev=0.01), name="Theta") lambda_val = tf.constant(lambda_value) # Trying to implement this way http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=MachineLearning&doc=exercises/ex5/ex5.html y_predicted = tf.matmul(x, theta, name="y_predicted") #regularization_cost_part = tf.cast(tf.multiply(lambda_val,tf.reduce_sum(tf.pow(theta,2)), name="regularization_param"), 'float') #polynomial_cost_part = tf.reduce_sum(tf.pow(tf.subtract(y_predicted, y), 2), name="polynomial_sum") # Set up some summary info to debug with tf.name_scope('cost') as scope: #cost_func = tf.multiply(tf.cast(1/(2*batch_size), 'float'), tf.cast(tf.add(polynomial_cost_part, regularization_cost_part), 'float')) cost_func = (tf.nn.l2_loss(y_predicted - y)+lambda_val*tf.nn.l2_loss(theta))/float(batch_size) #DEPRECATED*** cost_summary = tf.scalar_summary("cost", cost_func) cost_summary = tf.summary.scalar('cost', cost_func)# Add a scalar summary for the snapshot loss. training_func = tf.train.GradientDescentOptimizer(alpha).minimize(cost_func) with tf.name_scope("test") as scope: correct_prediction = tf.subtract(tf.cast(1, 'float'), tf.reduce_mean(tf.subtract(y_predicted, y))) accuracy = tf.cast(correct_prediction, "float") #DEPRECATED*** accuracy_summary = tf.scalar_summary("accuracy", accuracy) #accuracy_summary = tf.summary.scalar("accuracy", accuracy) saver = tf.train.Saver() #DEPRECATED*** merged = tf.merge_all_summaries() merged = tf.summary.merge_all() #DEPRECATED*** writer = tf.train.SummaryWriter("/tmp/football_logs", session.graph_def) writer = tf.summary.FileWriter("/tmp/football_logs", session.graph) #DEPRECATED*** init = tf.initialize_all_variables() init = tf.global_variables_initializer() session.run(init) for i in range(1, (len(x_train)/batch_size)): session.run(training_func, feed_dict={x: x_train[i*batch_size:i*batch_size+batch_size], y: y_train[i*batch_size:i*batch_size+batch_size]}) if i % batch_size == 0: print "test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) #result = session.run([merged, accuracy], feed_dict={x: x_test, y: y_test}) # writer.add_summary(result[0], i) # print "step %d, training accuracy %g"%(i, result[1]) #writer.flush() print "final test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) # save_path = saver.save(session, "/tmp/football.ckpt") # print "Model saved in file: ", save_path session.close() run_regression(train_data, train_labels, test_data, test_labels, normalize=False, alpha=1e-8)

As I said, you probably want to change the structure to ensure readability and scalability, but I hope this helps!

Cheers andres

Tensor multidimensional linear regression does not converge

CODE:

More articles: