, , , , ( , ).
, , ( )!
, , ( !).
, :
: {'auto,' svd, 'cholesky,' lsqr, 'sparse_cg,' sag, 'saga}
:
' .
'svd X . , "".
'cholesky scipy.linalg.solve .
'sparse_cg , scipy.sparse.linalg.cg. , "" ( tol max_iter).
'lsqr scipy.sparse.linalg.lsqr. , . .
'sag , "" , SAGA. , n_samples, n_features . , "sag saga fast . sklearn.preprocessing.
, . " sag and" saga , fit_intercept True.
: sparse_cg
, , lsqr
CV. ( ), , / ( SGD), , .
, sag
sgd
, . ( ), merrit ( : , ). , saga
sgd
( + ). ( : !)
:
from sklearn.datasets import make_regression
from sklearn.linear_model import Ridge
from time import perf_counter
X, y, = make_regression(n_samples=38000, n_features=7000, n_informative=500,
bias=-2.0, noise=0.1, random_state=0)
print(type(X))
clf = Ridge(alpha=1.0, solver="lsqr")
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('LSQR: used secs: ', end-start)
:
LSQR: used secs: 8.489622474064486
( ~ 6-8 ).
, Ridge Ridge SGD (, , , ), , SGD. (, , , ):
:, ! , SGD eta_0
( !)
Partial-:
X, y, = make_regression(n_samples=3800, n_features=700, n_informative=500,
noise=0.1, random_state=0)
print(type(X))
clf = Ridge(alpha=1.0, solver="lsqr", fit_intercept=False)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('LSQR: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
clf = Ridge(alpha=1.0, solver="sparse_cg", fit_intercept=False)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('sparse_cg: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
clf = SGDRegressor(loss='squared_loss', penalty='l2', alpha=1., fit_intercept=False,
random_state=0)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('SGD: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
clf = SGDRegressor(loss='squared_loss', penalty='l2', alpha=1., fit_intercept=False,
random_state=0, average=True)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('SGD: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
clf = SGDRegressor(loss='squared_loss', penalty='l2', alpha=1., fit_intercept=False,
random_state=0, learning_rate="constant", eta0=0.001)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('SGD: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
clf = SGDRegressor(loss='squared_loss', penalty='l2', alpha=1., fit_intercept=False,
random_state=0, n_iter=50, average=True)
start = perf_counter()
clf.fit(X, y)
end = perf_counter()
print('SGD: used secs: ', end-start)
print('train-score: ', clf.score(X, y))
:
LSQR: used secs: 0.08252486090450709
train-score: 0.999999907282
sparse_cg: used secs: 0.13668818702548152
train-score: 0.999999181151
SGD: used secs: 0.04154542095705427
train-score: 0.743448766459
SGD: used secs: 0.05300238587407993
train-score: 0.774611911034
SGD: used secs: 0.038653031605587
train-score: 0.733585661919
SGD: used secs: 0.46313909066321507
train-score: 0.776444474871
: partial_fit / out-of-memory
mini_batches
( β SGD β )! : !