How to read lines without iteration

I have a text file, and I have a condition in which I need to extract a piece of text to any other line, but a piece of text can be any number of lines (FASTA file for any people with bioinformatics), It is mainly configured as follows:

> header, info, info
TEXT-------------------------------------------------------
----------------------------------------------------
>header, info...
TEXT-----------------------------------------------------

... etc.

I am trying to extract the "TEXT" part. Here is the code I installed:

for line in ffile:
    if line.startswith('>'):

      # do stuff to header line

        try:
            sequence = ""
            seqcheck = ffile.next() # line after the header will always be the beginning of TEXT
            while not seqcheck.startswith('>'):
                        sequence += seqcheck
                        seqcheck = ffile.next()

        except:       # iteration error check
            break

This does not work, because every time I call next (), it continues the for loop, which leads to the fact that I skip a lot of lines and lose a lot of data. How can I just peek into the next line without moving the iterator forward?

+4
source share
5 answers

, , '>', .

>>> content = '''> header, info, info
... TEXT-------------------------------------------------------
... ----------------------------------------------------
... >header, info...
... TEXT-----------------------------------------------------'''
>>> 
>>> f = StringIO(content)
>>> 
>>> my_data = []
>>> for line in f:
...   if not line.startswith('>'):
...     my_data.append(line)
... 
>>> ''.join(my_data)
'TEXT-------------------------------------------------------\n----------------------------------------------------\nTEXT-----------------------------------------------------'
>>> 

:

@tobias_k :

>>> def get_content(f):
...   my_data = []
...   for line in f:
...     if line.startswith('>'):
...       yield my_data
...       my_data = []
...     else:
...       my_data.append(line)
...   yield my_data  # the last on
... 
>>> 
>>> f.seek(0)
>>> for i in get_content(f):
...   print i
... 
[]
['TEXT-------------------------------------------------------\n', '----------------------------------------------------\n']
['TEXT-----------------------------------------------------']
>>> 
+3

?:

txt='''\
> header, info, info
TEXT----------------------------------------------------------------
TEXT2-------------------------------------------
>header, info...
TEXT-----------------------------------------------------'''


import re

for header, data in ((m.group(1), m.group(2)) for m in re.finditer(r'^(?:(>.*?$)(.*?)(?=^>|\Z))', txt, re.S | re.M)):
    # process header
    # process data
    print header, data

.

, , .


, mmap, .

+1

. , , ( ), :

for line in ffile:
    if not line.startswith('>'):
        sequence = line
        for line in ffile:
            if line.startswith('>'): break
            sequence += line
        print "<text>", sequence
    if line.startswith('>'):
        print "<header>", line

-, for ( ffile ), try/except. -, , line sequence, , : if line , ( else, ).

0

- enumerate:

lines = ffile.readlines()
for i, line in enumerate(lines):
    if line.startswith('>'):
        sequence = ""
        for l in lines[i+1:]:
            if l.startswith('>'):
                break
            sequence += l
0

. , , , / ! - BACK, , , !

deque(), . ffile-, , ffile.

, -, , deque .

import cStringIO,collections
original_ffile=cStringIO.StringIO('''
> header, info, info
TEXT----------------------------------------------------------------
TEXT2-------------------------------------------
>header, info...
TEXT-----------------------------------------------------''')

def peaker(_iter,_buffer):
    popleft=_buffer.popleft
    while True:
        while _buffer: yield popleft() # this implements FIFO-style
        yield next(_iter) # we don't have to catch StopIteration here!
buf=collections.deque()
push_back=buf.append
ffile=peaker(original_ffile,buf)
for line in ffile:
    if line.startswith('>'):
        print "found a header! %s"%line[:-1]
        # do stuff to header line
        sequence = ""
        for seqcheck in ffile:
            if seqcheck.startswith('>'):
                print "oops, we've gone too far, pushing back: %s"%seqcheck[:-1]
                push_back(seqcheck)
                break
            sequence += seqcheck

:

found a header! > header, info, info
oops, we've gone too far, pushing back: >header, info...
found a header! >header, info...
0

Source: https://habr.com/ru/post/1543270/


All Articles