, , , , OneVsRestClassifier scikit-multilearn, scikit-learn, , , OneVsRest.
scikit-multilearn . Tsoumakas MLC.
, , , , Label Powerset, - , .
scikit-multilearn :
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from skmultilearn.ensemble import LabelSpacePartitioningClassifier
from skmultilearn.cluster import IGraphLabelCooccurenceClusterer
from skmultilearn.problem_transform import LabelPowerset
categories = ['a', 'b', 'c']
X = ["This is a test", "This is another attempt", "And this is a test too!"]
Y = [['a', 'b'],['b'],['a','b']]
mlb = MultiLabelBinarizer(classes=categories)
vectorizer = HashingVectorizer(decode_error='ignore', n_features=2 ** 18, non_negative=True)
X_train = vectorizer.fit_transform(X)
Y_train = mlb.fit_transform(Y)
base_classifier = MultinomialNB(alpha=0.01)
transformation_classifier = LabelPowerset(base_classifier)
clusterer = IGraphLabelCooccurenceClusterer('fastgreedy', weighted=True, include_self_edges=True)
clf = LabelSpacePartitioningClassifier(transformation_classifier, clusterer)
clf.fit(X_train, Y_train)