I can easily enter test data from a single file. However, whenever I try to enter data from mulitiple files into a directory, I get the following error: AttributeError: the "NoneType" object does not have the "lower" attribute. Please see my codes below, I will appreciate any help. Thank you
from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfTransformer from nltk.corpus import stopwords import numpy as np import numpy.linalg as LA import os path = "C:\zircon" def radfil(): for file in os.listdir(path): current = os.path.join(path, file) if os.path.isfile(current): data = open(current, "rb").read() print data train_set = [radfil()] test_set = ["The sun in the sky is bright."] stopWords = stopwords.words('english') vectorizer = CountVectorizer(stop_words=stopWords, min_df=1)
source share