If you know what the data structure should look like, can you forget about the first three rows and extract the data from the rest of the rows? For example, if you assume that the table is in the text file table_file , then
table_data = {'Serial Number':[], 'Name':{'First': [], 'Middle': [] 'Last': []}, 'Marks': []} with open(table_file, 'r') as table: # skip first 3 rows for _ in range(3): table.next() for row in table: row = row.strip('\n').split('|') values = [r.strip() for r in row if r != ''] assert len(values) == 5 table_data['Serial Number'].append(int(values[0])) table_data['Name']['First'].append(values[1]) table_data['Name']['Middle'].append(values[2]) table_data['Name']['Last'].append(values[3]) table_data['Marks'].append(values[4])
EDIT: To build the table_data dictionary, consider the following pseudocode. Fair warning, I tested this and it seems to work for your example and should work for anything with two header lines. However, this is careless, because I wrote in about 10 minutes. However, this may be the beginning of OK, from which you can improve and expand. This also assumes that you have code to extract pos_list and name_list .
for itertools import tee, izip def pairwise(iterable): a, b = tee(iterable) next(b, None) return izip(a, b) def create_table_dict(pos_list, name_list): intervals = [] for sub_list in pos_list: intervals.append(list(pairwise(sub_list))) items = [] for interval, name in zip(intervals, name_list): items.append([ (i, n) for i, n in zip(interval, name) ]) names = [] for int1, name1 in items[0]: past_names = [] for int2, name2 in items[1]: if int1[0] == int2[0]: if int1[1] == int2[1]: names.append(' '.join((name1, name2)).strip()) elif int2[1] < int1[1]: past_names.append(name2) elif int1[0] < int2[0]: if int2[1] < int1[1]: past_names.append(name2) elif int1[1] == int2[1]: names.append('{0}:{1}'.format(name1, ','.join(past_names + [name2]))) table = {} for name in names: if ':' not in name: table[name] = [] else: upper, nested = name.split(':') nested = nested.split(',') table[upper] = {} for n in nested: table[upper][n] = [] print table