for endrow in range(1000, 4000000, 1000):
startrow = endrow - 1000
rows = 1000
try:
pd.read_csv(filename, dtype={"DstPort": int}, skiprows=startrow, nrows=rows, header=None,
names=['Time','Duration','SrcDevice','DstDevice','Protocol','SrcPort',
'DstPort','SrcPackets','DstPackets','SrcBytes','DstBytes'],
usecols=['Duration','SrcDevice', 'DstDevice', 'Protocol', 'DstPort',
'SrcPackets','DstPackets','SrcBytes','DstBytes'])
except ValueError:
print(f"Error is from row {startrow} to row {endrows}")
Divide the file into several data frames with 1000 lines to see which range of lines there is a mixed type value that causes this problem.
source
share