How to extract and share values ​​from a dictionary into another in Python?

sample = [['CGG','ATT'],['GCGC','TAAA']]

#Frequencies of each base in the pair
d1 = [[{'G': 0.66, 'C': 0.33}, {'A': 0.33, 'T': 0.66}], [{'G': 0.5, 'C': 0.5}, {'A': 0.75, 'T': 0.25}]]

#Frequencies of each pair occurring together

d2 = [{('C', 'A'): 0.33, ('G', 'T'): 0.66}, {('G', 'T'): 0.25, ('C', 'A'): 0.5, ('G', 'A'): 0.25}]

Problem:

Consider the first pair: ['CGG', 'ATT']

How to calculate a, where a:

float(a) = (freq of pairs) - ((freq of C in CGG) * (freq of A in ATT))

eg. in CA pairs, float (a) = (freq of CA pairs) - ((freq of C in CGG) * (freq of A in ATT))

Output a = (0.33) - ((0.33) * (0.33)) = 0.222222

Calculation of “a” for any one combination (CA pair or GT pair)

Final Output for sample : a = [0.2222, - 0.125]

How to calculate b, where b:

float (b) = (float(a)^2)/ (freq of C in CGG) * (freq G in CGG) * (freq A in ATT) * (freq of T in ATT)

Output b = 1

Do it for the whole list

Final Output for sample : b = [1, 0.3333]

I do not know how to extract the required values ​​from d1 and d2 and perform mathematical operations.

I tried to write the following code for the value

float a = {k: float(d1[k][0]) - d2[k][0] * d2[k][1]for k in d1.viewkeys() & d2.viewkeys()}

But that will not work. In addition, I prefer the for loop over the concepts

My attempt to write a (rather erroneous) for-loop for the above:

float_a = []
for pair,i in enumerate(d2):
    for base,j in enumerate(d1):
        float (a) = pair[i][0] - base[j][] * base[j+1][]
        float_a.append(a)

float_b = []
  for floata in enumerate(float_a):
    for base,j in enumerate(d1):
        float (b) = (float(a) * float(a)) - (base[j] *    base[j+1]*base[j+2]*base[j+3])
        float_b.append(b)
+4
source share
1 answer

, , , , , . , :

from collections import Counter

def get_base_freq(seq):
    """
    Returns the normalized frequency of each base in a given sequence as a dictionary.
    A dictionary comprehension converts the Counter object into a "normalized" dictionary.
    """
    seq_len = len(seq)
    base_counts = Counter(seq)
    base_freqs = {base: float(count)/seq_len for base, count in base_counts.items()}
    return base_freqs

def get_pair_freq(seq1, seq2):
    """
    Uses zip to merge two sequence strings together.
    Then performs same counting and normalization as in get_base_freq.
    """
    seq_len = len(seq1)
    pair_counts = Counter(zip(seq1, seq2))
    pair_freqs = {pair: float(count)/seq_len for pair, count in pair_counts.items()}
    return pair_freqs

def calc_a(d1, d2):
    """
    Arbitrarily takes the first pair in d2 and calculates the a-value from it.
    """
    first_pair, pair_freq = d2.items()[0]
    base1, base2 = first_pair
    a = pair_freq - (d1[0][base1]*d1[1][base2])
    return a

def calc_b(a, d1):
    """
    For this calculation, we need to use all of the values from d1 and multiply them together.
    This is done by merging the two sequence half-results together and multiplying in a for loop.
    """
    denom_ACGT = d1[0].values() + d1[1].values()
    denom = 1
    for val in denom_ACGT:
        denom *= val
    b = a*a/float(denom)
    return b

if __name__ == "__main__":
    sample = [['CGG','ATT'], ['GCGC','TAAA'], ['ACAA','CAAC']]
    b_result = []
    for seq_pair in sample:
        d1 = [get_base_freq(seq) for seq in seq_pair]
        d2 = get_pair_freq(*seq_pair)
        a = calc_a(d1, d2)
        b = calc_b(a, d1)
        b_result.append(b)
    print b_result

, - , !

+1

Source: https://habr.com/ru/post/1657943/


All Articles