Matching indices in a list of lists - dictionary - python

I have a dictionary containing a list of lists:

d = [('Locus_1',
  [['>Safr02', 'R', '104'],
   ['>Safr03', 'G', '104'],
   ['>Safr04', 'A', '104'],
   ['>Safr10', 'A', '104'],
   ['>Safr02', 'K', '110'],
   ['>Safr03', 'T', '110'],
   ['>Safr04', 'T', '110'],
   ['>Safr10', 'T', '110']]),
 ('Locus_2',
  [['>Safr01', 'C', '15'],
   ['>Safr02', 'Y', '15'],
   ['>Safr04', 'Y', '15'],
   ['>Safr07', 'Y', '15'],
   ['>Safr01', 'Y', '78'],
   ['>Safr02', 'T', '78'],
   ['>Safr04', 'T', '78'],
   ['>Safr07', 'T', '78']])]

The dictionary is created with the following code:

snp_file = open(sys.argv[2], 'r')
snps = csv.reader(snp_file, delimiter=',')

d = OrderedDict()

for row in snps:
    key = row[0]
    d.setdefault(key,[])
    d[key].append(row[1:])

Data can be found here: https://www.dropbox.com/sh/3j4i04s2rg6b63h/AADkWG3OcsutTiSsyTl8L2Vda?dl=0

I have (for me) a bit of a difficult task to perform this data, I want to break it down into several stages, but I'm not sure how to do it:

I need to look at the data in pairs relative to Locus_X-name (I have a list of pairs in another file, but for this question, you can simply say that Locus_1and Locus_2this is a pair).

, Locus_1:Locus_2 0 - (SafrXX). , (So Locus_1:Safr02-Locus_2:Safr02), 1 - : R:Y.

, 2.

, , , :

R:Y
R:T
K:Y
K:T

, , , ? -, , ?

+4
2

, . , , , , .

, , , zip- python

import csv
from collections import OrderedDict

snp_file = open('data.txt', 'r')
snps = csv.reader(snp_file, delimiter=',')
d = OrderedDict()
for row in snps:
    key = row[0]
    d.setdefault(key,[])
    d[key].append(row[1:])

for left,right in zip(d['Locus_1'],d['Locus_2']):
    print(left,right)

, , :

['>Safr02', 'R', '104'] ['>Safr01', 'C', '15']
['>Safr03', 'G', '104'] ['>Safr02', 'Y', '15']
['>Safr04', 'A', '104'] ['>Safr04', 'Y', '15']
['>Safr10', 'A', '104'] ['>Safr07', 'Y', '15']
['>Safr02', 'K', '110'] ['>Safr01', 'Y', '78']
['>Safr03', 'T', '110'] ['>Safr02', 'Y', '78']
['>Safr04', 'T', '110'] ['>Safr04', 'T', '78']
['>Safr10', 'T', '110'] ['>Safr07', 'T', '78']

script , , :

loc1 = sorted(d['Locus_1'], key=lambda lst: lst[0]): 
loc2 = sorted(d['Locus_2'], key=lambda lst: lst[0]): 

for left,right in zip(loc1,loc2):
    print(left,right)

:

['>Safr02', 'R', '104'] ['>Safr01', 'C', '15']
['>Safr02', 'K', '110'] ['>Safr01', 'Y', '78']
['>Safr03', 'G', '104'] ['>Safr02', 'Y', '15']
['>Safr03', 'T', '110'] ['>Safr02', 'Y', '78']
['>Safr04', 'A', '104'] ['>Safr04', 'Y', '15']
['>Safr04', 'T', '110'] ['>Safr04', 'T', '78']
['>Safr10', 'A', '104'] ['>Safr07', 'Y', '15']
['>Safr10', 'T', '110'] ['>Safr07', 'T', '78']

, , , , :

for l1 in loc1:
    for l2 in loc2:
        if l1[0] == l2[0]:
            print('{}-{}({}):{}({})'.format(l1[0],l1[1],l1[2],l2[1],l2[2]))

, ( , ):

>Safr02-R(104):Y(15)
>Safr02-R(104):Y(78)
>Safr02-K(110):Y(15)
>Safr02-K(110):Y(78)
>Safr04-A(104):Y(15)
>Safr04-A(104):T(78)
>Safr04-T(110):Y(15)
>Safr04-T(110):T(78)

, , , - , . , , , , , .

1 2 , , 3 , .

, , .

0

. pair , . , .

import csv

snp_file = open('input.txt', 'r')
snps = csv.reader(snp_file, delimiter=',')
pair=(1,2)#the choosen pair

dic={}
i=0
for row in snps:
    if row==[]: break
    locus=int(row[0][len('locus_'):])
    safr=int(row[1][len('>safr'):])
    letter=row[2]
    number=row[3]
    index=i
    if (locus, safr) in dic:
        dic[locus, safr].append((letter, number, index))
    else:
        dic[locus, safr]=[(letter, number, index)]
    i+=1

for key in dic:
    if key[0]==pair[0] and (pair[1], key[1]) in dic:
        for e in dic[key]:
            for f in dic[pair[1], key[1]]:
                print e, ' ', f

:

('R', '104', 0)   ('Y', '15', 9)
('R', '104', 0)   ('T', '78', 13)
('K', '110', 4)   ('Y', '15', 9)
('K', '110', 4)   ('T', '78', 13)
('A', '104', 2)   ('Y', '15', 10)
('A', '104', 2)   ('T', '78', 14)
('T', '110', 6)   ('Y', '15', 10)
('T', '110', 6)   ('T', '78', 14)
0

Source: https://habr.com/ru/post/1584483/


All Articles