I am trying to encode some reading information into a machine learning model using the following
import numpy as np import pandas as pd import matplotlib.pyplot as py Dataset = pd.read_csv('filename.csv', sep = ',') X = Dataset.iloc[:,:-1].values Y = Dataset.iloc[:,18].values from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) onehotencoder = OneHotEncoder(categorical_features = [0]) X = onehotencoder.fit_transform(X).toarray()
however i get an error
runfile('C:/Users/name/Desktop/Machine Learning/Data preprocessing template.py', wdir='C:/Users/taylorr2/Desktop/Machine Learning') Traceback (most recent call last): File "<ipython-input-141-a5d1cd02c2df>", line 1, in <module> runfile('C:/Users/name/Desktop/Machine Learning/Data preprocessing template.py', wdir='C:/Users/taylorr2/Desktop/Machine Learning') File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile execfile(filename, namespace) File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile exec(compile(scripttext, filename, 'exec'), glob, loc) File "C:/Users/name/Desktop/Machine Learning/Data preprocessing template.py", line 8, in <module> Y = Dataset.iloc[:,18].values File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py", line 1310, in __getitem__ return self._getitem_tuple(key) File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py", line 1560, in _getitem_tuple self._has_valid_tuple(tup) File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py", line 151, in _has_valid_tuple if not self._has_valid_type(k, i): File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py", line 1528, in _has_valid_type return self._is_valid_integer(key, axis) File "C:\Users\name\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexing.py", line 1542, in _is_valid_integer raise IndexError("single positional indexer is out-of-bounds") IndexError: single positional indexer is out-of-bounds
I read the same error question here and tried
import numpy as np import pandas as pd import matplotlib.pyplot as py Dataset = pd.read_csv('filename.csv', sep = ',') table = Dataset.find(id='AlerId') rows = table.find_all('tr')[1:] data = [[cell.text for cell in row.find_all('td')] for row in rows] Dataset1 = pd.DataFrame(data=data, columns=columns) X = Dataset1.iloc[:,:-1].values Y = Dataset1.iloc[:,18].values from sklearn.preprocessing import LabelEncoder, OneHotEncoder labelencoder_X = LabelEncoder() X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) onehotencoder = OneHotEncoder(categorical_features = [0]) X = onehotencoder.fit_transform(X).toarray()
However, I think that this could just confuse me more and now I am even more so.
Any suggestions?