- , .
( , . , cookie.)
. , .
, ColumnExtractor BaseEstimator TransformerMixin, , sklearn.
.
pd.get_dummies, ,
. , (), y ; - , .
Enjoy.
.
import pandas as pd
import numpy as np
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
X = pd.DataFrame({'cat': ['a', 'b', 'c'], 'n1': [1, 2, 3], 'n2':[5, 7, 9] })
cat n1 n2
0 a 1 5
1 b 2 7
2 c 3 9
class ColumnExtractor(BaseEstimator, TransformerMixin):
def __init__(self, columns=None):
self.columns = columns
def fit(self, X, y=None):
return self
def transform(self, X):
X_cols = X[self.columns]
return X_cols
pipe2nvars = Pipeline([
('features', FeatureUnion([('num',
Pipeline([('extract',
ColumnExtractor(columns=['n1', 'n2'])),
('poly',
PolynomialFeatures()) ])),
('cat_var',
ColumnExtractor(columns=['cat_b','cat_c']))])
)])
for p in range(1, 4):
pipe2nvars.set_params(features__num__poly__degree=p)
res = pipe2nvars.fit_transform(pd.get_dummies(X, drop_first=True))
print('polynomial degree: {}; shape: {}'.format(p, res.shape))
print(res)
polynomial degree: 1; shape: (3, 5)
[[1. 1. 5. 0. 0.]
[1. 2. 7. 1. 0.]
[1. 3. 9. 0. 1.]]
polynomial degree: 2; shape: (3, 8)
[[ 1. 1. 5. 1. 5. 25. 0. 0.]
[ 1. 2. 7. 4. 14. 49. 1. 0.]
[ 1. 3. 9. 9. 27. 81. 0. 1.]]
polynomial degree: 3; shape: (3, 12)
[[ 1. 1. 5. 1. 5. 25. 1. 5. 25. 125. 0. 0.]
[ 1. 2. 7. 4. 14. 49. 8. 28. 98. 343. 1. 0.]
[ 1. 3. 9. 9. 27. 81. 27. 81. 243. 729. 0. 1.]]