Extract values ​​between two lines in a text file using python

Suppose I have a text file with the content below

fdsjhgjhg fdshkjhk Start Good Morning Hello World End dashjkhjk dsfjkhk 

Now I need to write Python code that will read a text file and copy the contents between Start and end to another file.

I wrote the following code.

 inFile = open("data.txt") outFile = open("result.txt", "w") buffer = [] keepCurrentSet = True for line in inFile: buffer.append(line) if line.startswith("Start"): #---- starts a new data set if keepCurrentSet: outFile.write("".join(buffer)) #now reset our state keepCurrentSet = False buffer = [] elif line.startswith("End"): keepCurrentSet = True inFile.close() outFile.close() 

I do not get the desired result, as expected. I am just starting. I want to get all the lines between Start and End. Excluding the beginning and the end.

+12
source share
7 answers
 with open('path/to/input') as infile, open('path/to/output', 'w') as outfile: copy = False for line in infile: if line.strip() == "Start": copy = True continue elif line.strip() == "End": copy = False continue elif copy: outfile.write(line) 
+34
source

If the text files are not necessarily large, you can get the entire content of the file, and then use regular expressions:

 import re with open('data.txt') as myfile: content = myfile.read() text = re.search(r'Start\n.*?End', content, re.DOTALL).group() with open("result.txt", "w") as myfile2: myfile2.write(text) 
+4
source

Using itertools.dropwhile , itertools.takewhile , itertools.islice :

 import itertools with open('data.txt') as f, open('result.txt', 'w') as fout: it = itertools.dropwhile(lambda line: line.strip() != 'Start', f) it = itertools.islice(it, 1, None) it = itertools.takewhile(lambda line: line.strip() != 'End', it) fout.writelines(it) 

UPDATE . As inspector G4dget commented, the code above the code copies the first block. To copy multiple blocks, use the following command:

 import itertools with open('data.txt', 'r') as f, open('result.txt', 'w') as fout: while True: it = itertools.dropwhile(lambda line: line.strip() != 'Start', f) if next(it, None) is None: break fout.writelines(itertools.takewhile(lambda line: line.strip() != 'End', it)) 
+4
source

I am not a Python expert, but this code should do the job.

 inFile = open("data.txt") outFile = open("result.txt", "w") keepCurrentSet = False for line in inFile: if line.startswith("End"): keepCurrentSet = False if keepCurrentSet: outFile.write(line) if line.startswith("Start"): keepCurrentSet = True inFile.close() outFile.close() 
+3
source

Move the call to outFile.write to the second if :

 inFile = open("data.txt") outFile = open("result.txt", "w") buffer = [] for line in inFile: if line.startswith("Start"): buffer = [''] elif line.startswith("End"): outFile.write("".join(buffer)) buffer = [] elif buffer: buffer.append(line) inFile.close() outFile.close() 
+2
source
 import re inFile = open("data.txt") outFile = open("result.txt", "w") buffer1 = "" keepCurrentSet = True for line in inFile: buffer1=buffer1+(line) buffer1=re.findall(r"(?<=Start) (.*?) (?=End)", buffer1) outFile.write("".join(buffer1)) inFile.close() outFile.close() 
+1
source

I would handle this as follows:

 inFile = open("data.txt") outFile = open("result.txt", "w") data = inFile.readlines() outFile.write("".join(data[data.index('Start\n')+1:data.index('End\n')])) inFile.close() outFile.close() 
0
source

Source: https://habr.com/ru/post/954038/


All Articles