mata, sorted:
from Bio import SeqIO
import operator
input_file = open("example.fasta")
my_dict = SeqIO.to_dict(SeqIO.parse(input_file, "fasta"))
for r in sorted(my_dict.values(), key=operator.attrgetter('seq')):
print r.id, str(r.seq)
:
seq3 ABCDEFG
seq0 ABCWYXO
seq2 BCDEFGH
seq1 IJKLMNOP
, . 200 , , . , .
, (, ) , .
. .
def levenshteinDistance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2+1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
distances = distances_
return distances[-1]
, ( //) . FASTA:
from Bio import SeqIO
from itertools import combinations
input_file = open("example.fasta")
treshold = 4
records = SeqIO.parse(input_file, "fasta")
for record1, record2 in combinations(records, 2):
edit_distance = levenshteinDistance(str(record1.seq), str(record2.seq))
if edit_distance <= treshold:
print "{} and {} differ in {} characters".format(record1.id, record2.id, edit_distance)
:
seq0 and seq3 differ in 4 characters
seq2 and seq3 differ in 2 characters