Not a vocation programmer, please excuse me if this is obvious. I cannot run a loop: / ...
I have 3 lists:
gene_concepts[0] = ['+0|+77|CFTR', '+12|+77|CYP2C19']
genes = ['CFTR', 'CFTR', 'CFTR', 'CFTR', 'CFTR', 'CFTR', 'CFTR',
'CFTR', 'CFTR', 'CFTR', 'CFTR', 'CFTR', 'CYP2C19', 'CYP2C19',
'CYP2C19', 'CYP2C19', 'CYP2C19', 'CYP2C19', 'CYP2C19', 'CYP2C19']
haplotypes = ['CFTR F508del(CTT)', 'CFTR F508del(TCT)', 'CFTR G1244E',
'CFTR G1349D', 'CFTR G178R', 'CFTR G551D', 'CFTR G551S', 'CFTR S1251N',
'CFTR S1255P', 'CFTR S549N', 'CFTR S549R(A>C)', 'CFTR S549R(T>G)',
'CYP2C19 *10', 'CYP2C19 *10', 'CYP2C19 *10', 'CYP2C19 *10', 'CYP2C19
*10', 'CYP2C19 *10', 'CYP2C19 *10', 'CYP2C19 *10']
Note that the haplotypes and genes correspond (ie, the first member of the line in the CFTR haplotype list and corresponds to the first element of the list in the gene list ... so they are ordered)
I want to create a new list or just output a set of lines so that haplotypes having the same gene (so that genes can match each other or a substring of the first part of a haplotype string, depending on which specific code is assigned that is in the gene_concepts list and matches the first member before "|", the separator in the list of strings.
Desired Result:
+21|+0|CFTR F508del(CTT)
+22|+0|CFTR F508del(TCT)
+23|+0|CFTR G1244E
+24|+0|CFTR G1349D
+25|+0|CFTR G178R
+26|+0|CFTR G551D
+27|+0|CFTR G551S
+28|+0|CFTR S1251N
+29|+0|CFTR S1255P
+30|+0|CFTR S549N
+31|+0|CFTR S549R(A>C)
+32|+0|CFTR S549R(T>G)
+33|+12|CYP2C19 *10
+34|+12|CYP2C19 *10
+35|+12|CYP2C19 *10
+36|+12|CYP2C19 *10
+37|+12|CYP2C19 *10
+38|+12|CYP2C19 *10
+39|+12|CYP2C19 *10
+40|+12|CYP2C19 *10
, "+21... + 39 - temp_code_2"... , . - , . 2- .
...
def generate_haplotype_concepts(gene_concepts[0], haplotypes):
temp_code_2 = 20
index = 0
for batch_line in gene_concepts[0]:
gene_parent_code = batch_line.split("|")[0]
gene_parent_medcodes.append(gene_parent_code)
index_gene = 0
index_parent_code = 0
for gene in genes:
if (index_gene == 0):
print("+" + str(temp_code_2) + "|"
+ gene_parent_medcodes[index_parent_code] + "|"
+ haplotypes[index_gene])
index_gene += 1
elif (genes[index_gene] == genes[index_gene-1]):
print("+" + str(temp_code_2) + "|"
+ gene_parent_medcodes[index_parent_code] + "|"
+ haplotypes[index_gene-1])
else:
index_parent_code += 1
print("+" + str(temp_code_2) + "|"
+ gene_parent_medcodes[index_parent_code] + "|"
+ haplotypes[index_gene])
index_gene += 1
temp_code_2 += 1
generate_haplotype_concepts(gene_concepts[0], haplotypes)
:
+21|+0|CFTR F508del(CTT)
+22|+0|CFTR F508del(TCT)
+23|+0|CFTR G1244E
+24|+0|CFTR G1349D
+25|+0|CFTR G178R
+26|+0|CFTR G551D
+27|+0|CFTR G551S
+28|+0|CFTR S1251N
+29|+0|CFTR S1255P
+30|+0|CFTR S549N
+31|+0|CFTR S549R(A>C)
+32|+12|CYP2C19 *10
+33|+12|CYP2C19 *10
+34|+12|CYP2C19 *10
+35|+12|CYP2C19 *10
+36|+12|CYP2C19 *10
+37|+12|CYP2C19 *10
+38|+12|CYP2C19 *10
+39|+12|CYP2C19 *10
2 , ... CFTR (+32 | +0 | CFTR S549R (T > G) ), " " .
IndexError Traceback (most recent call
last)
<ipython-input-16-1410b2513457> in <module>()
55
56
<ipython-input-16-1410b2513457> in
generate_haplotype_concepts(temp_code_2, haplotypes)
30
31 index_gene += 1
33 print("+" + str(temp_code_2) + "|"
34 + gene_parent_medcodes[index_parent_code] +
"|"
IndexError: list index out of range
, ... , , , ... !