I implemented linear regression with gradient descents in python. To see how good this is, I compared it with the LinearRegression () class scikit-learn. For some reason, sklearn always outperforms my MSE program out of 3 on average (I use the Boston Housing dataset for testing). I understand that I am not currently performing a gradient test to verify convergence, but I allow many iterations and set the learning speed low enough to SHOULD converge. Is there a clear mistake in my implementation of the learning algorithm? Here is my code:
import numpy as np
from sklearn.linear_model import LinearRegression
def getWeights(x):
lenWeights = len(x[1,:]);
weights = np.random.rand(lenWeights)
bias = np.random.random();
return weights,bias
def train(x,y,weights,bias,maxIter):
converged = False;
iterations = 1;
m = len(x);
alpha = 0.001;
while not converged:
for i in range(len(x)):
hypothesis = np.dot(x[i,:], weights) + bias;
error = hypothesis - y[i];
grad = (alpha * 1/m) * ( error * x[i,:] );
weights = weights - grad;
bias = bias - alpha * error;
iterations = iterations + 1;
if iterations > maxIter:
converged = True;
break
return weights, bias
def predict(x, weights, bias):
return np.dot(x,weights) + bias
if __name__ == '__main__':
data = np.loadtxt('housing.txt');
x = data[:,:-1];
y = data[:,-1];
for i in range(len(x[1,:])):
x[:,i] = ( (x[:,i] - np.min(x[:,i])) / (np.max(x[:,i]) - np.min(x[:,i])) );
initialWeights,initialBias = getWeights(x);
weights,bias = train(x,y,initialWeights,initialBias,55000);
pred = predict(x, weights,bias);
MSE = np.mean(abs(pred - y));
print "This Program MSE: " + str(MSE)
sklearnModel = LinearRegression();
sklearnModel = sklearnModel.fit(x,y);
sklearnModel = sklearnModel.predict(x);
skMSE = np.mean(abs(sklearnModel - y));
print "Sklearn MSE: " + str(skMSE)
source
share