We can use simple balanced square brackets for tricks - quotes with double quotes:
import re import six import pandas as pd data = """\ Item,Date,Time,Location,junk 1,01/01/2016,13:41,[45.2344:-78.25453],[aaaa,bbb] 2,01/03/2016,19:11,[43.3423:-79.23423,41.2342:-81242],[0,1,2,3] 3,01/10/2016,01:27,[51.2344:-86.24432],[12,13] 4,01/30/2016,05:55,[51.2344:-86.24432,41.2342:-81242,55.5555:-81242],[45,55,65]""" print('{0:-^70}'.format('original data')) print(data) data = re.sub(r'(\[[^\]]*\])', r'"\1"', data, flags=re.M) print('{0:-^70}'.format('quoted data')) print(data) df = pd.read_csv(six.StringIO(data)) print('{0:-^70}'.format('data frame')) pd.set_option('display.expand_frame_repr', False) print(df)
Output:
----------------------------original data----------------------------- Item,Date,Time,Location,junk 1,01/01/2016,13:41,[45.2344:-78.25453],[aaaa,bbb] 2,01/03/2016,19:11,[43.3423:-79.23423,41.2342:-81242],[0,1,2,3] 3,01/10/2016,01:27,[51.2344:-86.24432],[12,13] 4,01/30/2016,05:55,[51.2344:-86.24432,41.2342:-81242,55.5555:-81242],[45,55,65] -----------------------------quoted data------------------------------ Item,Date,Time,Location,junk 1,01/01/2016,13:41,"[45.2344:-78.25453]","[aaaa,bbb]" 2,01/03/2016,19:11,"[43.3423:-79.23423,41.2342:-81242]","[0,1,2,3]" 3,01/10/2016,01:27,"[51.2344:-86.24432]","[12,13]" 4,01/30/2016,05:55,"[51.2344:-86.24432,41.2342:-81242,55.5555:-81242]","[45,55,65]" ------------------------------data frame------------------------------ Item Date Time Location junk 0 1 01/01/2016 13:41 [45.2344:-78.25453] [aaaa,bbb] 1 2 01/03/2016 19:11 [43.3423:-79.23423,41.2342:-81242] [0,1,2,3] 2 3 01/10/2016 01:27 [51.2344:-86.24432] [12,13] 3 4 01/30/2016 05:55 [51.2344:-86.24432,41.2342:-81242,55.5555:-81242] [45,55,65]
UPDATE : if you are sure that all square brackets are balances, we do not need to use RegEx:
import io import pandas as pd with open('35948417.csv', 'r') as f: fo = io.StringIO() data = f.readlines() fo.writelines(line.replace('[', '"[').replace(']', ']"') for line in data) fo.seek(0) df = pd.read_csv(fo) print(df)