I am running Python 2.7 (64-bit) on a 64-bit Windows 8 system with 24GB memory. When executing the usual Sklearn.linear_models.Ridge code works fine.
Problem: However, when using Sklearn.linear_models.RidgeCV(alphas=alphas) for a fitting, I run a MemoryError error, shown below in the rr.fit(X_train, y_train) line rr.fit(X_train, y_train) , which performs the substitution procedure.
How can I prevent this error?
Code snippet
def fit(X_train, y_train): alphas = [1e-3, 1e-2, 1e-1, 1e0, 1e1] rr = RidgeCV(alphas=alphas) rr.fit(X_train, y_train) return rr rr = fit(X_train, y_train)
Error
MemoryError Traceback (most recent call last) <ipython-input-41-a433716e7179> in <module>() 1 # Fit Training set ----> 2 rr = fit(X_train, y_train) <ipython-input-35-9650bd58e76c> in fit(X_train, y_train) 3 4 rr = RidgeCV(alphas=alphas) ----> 5 rr.fit(X_train, y_train) 6 7 return rr C:\Python27\lib\site-packages\sklearn\linear_model\ridge.pyc in fit(self, X, y, sample_weight) 696 gcv_mode=self.gcv_mode, 697 store_cv_values=self.store_cv_values) --> 698 estimator.fit(X, y, sample_weight=sample_weight) 699 self.alpha_ = estimator.alpha_ 700 if self.store_cv_values: C:\Python27\lib\site-packages\sklearn\linear_model\ridge.pyc in fit(self, X, y, sample_weight) 608 raise ValueError('bad gcv_mode "%s"' % gcv_mode) 609 --> 610 v, Q, QT_y = _pre_compute(X, y) 611 n_y = 1 if len(y.shape) == 1 else y.shape[1] 612 cv_values = np.zeros((n_samples * n_y, len(self.alphas))) C:\Python27\lib\site-packages\sklearn\linear_model\ridge.pyc in _pre_compute_svd(self, X, y) 531 def _pre_compute_svd(self, X, y): 532 if sparse.issparse(X) and hasattr(X, 'toarray'): --> 533 X = X.toarray() 534 U, s, _ = np.linalg.svd(X, full_matrices=0) 535 v = s ** 2 C:\Python27\lib\site-packages\scipy\sparse\compressed.pyc in toarray(self, order, out) 559 def toarray(self, order=None, out=None): 560 """See the docstring for `spmatrix.toarray`.""" --> 561 return self.tocoo(copy=False).toarray(order=order, out=out) 562 563 ############################################################## C:\Python27\lib\site-packages\scipy\sparse\coo.pyc in toarray(self, order, out) 236 def toarray(self, order=None, out=None): 237 """See the docstring for `spmatrix.toarray`.""" --> 238 B = self._process_toarray_args(order, out) 239 fortran = int(B.flags.f_contiguous) 240 if not fortran and not B.flags.c_contiguous: C:\Python27\lib\site-packages\scipy\sparse\base.pyc in _process_toarray_args(self, order, out) 633 return out 634 else: --> 635 return np.zeros(self.shape, dtype=self.dtype, order=order) 636 637 MemoryError:
code
print type(X_train) print X_train.shape
Result
<class 'scipy.sparse.csr.csr_matrix'> (183576, 101507)