We have a large raw data file that we would like to trim to a specified size.
How would I go about getting the first N lines of a text file in python? Will the OS being used have any effect on the implementation?
Python 3:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in range(lines_number)]
print(head)
Python 2:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in xrange(lines_number)]
print head
Here's another way (both Python 2 & 3):
from itertools import islice
with open(path_to_file) as input_file:
head = list(islice(path_to_file, lines_number))
print(head)
N = 10
with open("file.txt", "a") as file: # the a opens it in append mode
for i in range(N):
line = next(file).strip()
print(line)
If you want to read the first lines quickly and you don't care about performance you can use .readlines() which returns list object and then slice the list.
E.g. for the first 5 lines:
with open("pathofmyfileandfileandname") as myfile:
firstNlines=myfile.readlines()[0:5] #put here the interval you want
Note: the whole file is read so is not the best from the performance point of view but it
is easy to use, fast to write and easy to remember so if you want just perform
some one-time calculation is very convenient
print firstNlines
One advantage compared to the other answers is the possibility to select easily the range of lines e.g. skipping the first 10 lines [10:30] or the lasts 10 [:-10] or taking only even lines [::2].
What I do is to call the N lines using pandas. I think the performance is not the best, but for example if N=1000:
import pandas as pd
yourfile = pd.read_csv('path/to/your/file.csv',nrows=1000)
There is no specific method to read number of lines exposed by file object.
I guess the easiest way would be following:
lines =[]
with open(file_name) as f:
lines.extend(f.readline() for i in xrange(N))
The two most intuitive ways of doing this would be:
Iterate on the file line-by-line, and break after N lines.
Iterate on the file line-by-line using the next() method N times. (This is essentially just a different syntax for what the top answer does.)
Here is the code:
# Method 1:
with open("fileName", "r") as f:
counter = 0
for line in f:
print line
counter += 1
if counter == N: break
# Method 2:
with open("fileName", "r") as f:
for i in xrange(N):
line = f.next()
print line
The bottom line is, as long as you don't use readlines() or enumerateing the whole file into memory, you have plenty of options.
Based on gnibbler top voted answer (Nov 20 '09 at 0:27): this class add head() and tail() method to file object.
class File(file):
def head(self, lines_2find=1):
self.seek(0) #Rewind file
return [self.next() for x in xrange(lines_2find)]
def tail(self, lines_2find=1):
self.seek(0, 2) #go to end of file
bytes_in_file = self.tell()
lines_found, total_bytes_scanned = 0, 0
while (lines_2find+1 > lines_found and
bytes_in_file > total_bytes_scanned):
byte_block = min(1024, bytes_in_file-total_bytes_scanned)
self.seek(-(byte_block+total_bytes_scanned), 2)
total_bytes_scanned += byte_block
lines_found += self.read(1024).count('\n')
self.seek(-total_bytes_scanned, 2)
line_list = list(self.readlines())
return line_list[-lines_2find:]
Usage:
f = File('path/to/file', 'r')
f.head(3)
f.tail(3)
most convinient way on my own:
LINE_COUNT = 3
print [s for (i, s) in enumerate(open('test.txt')) if i < LINE_COUNT]
Solution based on List Comprehension
The function open() supports an iteration interface. The enumerate() covers open() and return tuples (index, item), then we check that we're inside an accepted range (if i < LINE_COUNT) and then simply print the result.
Enjoy the Python. ;)
For first 5 lines, simply do:
N=5
with open("data_file", "r") as file:
for i in range(N):
print file.next()
If you want something that obviously (without looking up esoteric stuff in manuals) works without imports and try/except and works on a fair range of Python 2.x versions (2.2 to 2.6):
def headn(file_name, n):
"""Like *x head -N command"""
result = []
nlines = 0
assert n >= 1
for line in open(file_name):
result.append(line)
nlines += 1
if nlines >= n:
break
return result
if __name__ == "__main__":
import sys
rval = headn(sys.argv[1], int(sys.argv[2]))
print rval
print len(rval)
If you have a really big file, and assuming you want the output to be a numpy array, using np.genfromtxt will freeze your computer. This is so much better in my experience:
def load_big_file(fname,maxrows):
'''only works for well-formed text file of space-separated doubles'''
rows = [] # unknown number of lines, so use list
with open(fname) as f:
j=0
for line in f:
if j==maxrows:
break
else:
line = [float(s) for s in line.split()]
rows.append(np.array(line, dtype = np.double))
j+=1
return np.vstack(rows) # convert list of vectors to array
This worked for me
f = open("history_export.csv", "r")
line= 5
for x in range(line):
a = f.readline()
print(a)
I would like to handle the file with less than n-lines by reading the whole file
def head(filename: str, n: int):
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
Credit go to John La Rooy and Ilian Iliev. Use the function for the best performance with exception handle
Revise 1: Thanks FrankM for the feedback, to handle file existence and read permission we can futher add
import errno
import os
def head(filename: str, n: int):
if not os.path.isfile(filename):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
if not os.access(filename, os.R_OK):
raise PermissionError(errno.EACCES, os.strerror(errno.EACCES), filename)
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
You can either go with second version or go with the first one and handle the file exception later. The check is quick and mostly free from performance standpoint
Starting at Python 2.6, you can take advantage of more sophisticated functions in the IO base clase. So the top rated answer above can be rewritten as:
with open("datafile") as myfile:
head = myfile.readlines(N)
print head
(You don't have to worry about your file having less than N lines since no StopIteration exception is thrown.)
This works for Python 2 & 3:
from itertools import islice
with open('/tmp/filename.txt') as inf:
for line in islice(inf, N, N+M):
print(line)
fname = input("Enter file name: ")
num_lines = 0
with open(fname, 'r') as f: #lines count
for line in f:
num_lines += 1
num_lines_input = int (input("Enter line numbers: "))
if num_lines_input <= num_lines:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
else:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
print("Don't have", num_lines_input, " lines print as much as you can")
print("Total lines in the text",num_lines)
Here's another decent solution with a list comprehension:
file = open('file.txt', 'r')
lines = [next(file) for x in range(3)] # first 3 lines will be in this list
file.close()
An easy way to get first 10 lines:
with open('fileName.txt', mode = 'r') as file:
list = [line.rstrip('\n') for line in file][:10]
print(list)
#!/usr/bin/python
import subprocess
p = subprocess.Popen(["tail", "-n 3", "passlist"], stdout=subprocess.PIPE)
output, err = p.communicate()
print output
This Method Worked for me
Simply Convert your CSV file object to a list using list(file_data)
import csv;
with open('your_csv_file.csv') as file_obj:
file_data = csv.reader(file_obj);
file_list = list(file_data)
for row in file_list[:4]:
print(row)
Related
We have a large raw data file that we would like to trim to a specified size.
How would I go about getting the first N lines of a text file in python? Will the OS being used have any effect on the implementation?
Python 3:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in range(lines_number)]
print(head)
Python 2:
with open(path_to_file) as input_file:
head = [next(input_file) for _ in xrange(lines_number)]
print head
Here's another way (both Python 2 & 3):
from itertools import islice
with open(path_to_file) as input_file:
head = list(islice(path_to_file, lines_number))
print(head)
N = 10
with open("file.txt", "a") as file: # the a opens it in append mode
for i in range(N):
line = next(file).strip()
print(line)
If you want to read the first lines quickly and you don't care about performance you can use .readlines() which returns list object and then slice the list.
E.g. for the first 5 lines:
with open("pathofmyfileandfileandname") as myfile:
firstNlines=myfile.readlines()[0:5] #put here the interval you want
Note: the whole file is read so is not the best from the performance point of view but it
is easy to use, fast to write and easy to remember so if you want just perform
some one-time calculation is very convenient
print firstNlines
One advantage compared to the other answers is the possibility to select easily the range of lines e.g. skipping the first 10 lines [10:30] or the lasts 10 [:-10] or taking only even lines [::2].
What I do is to call the N lines using pandas. I think the performance is not the best, but for example if N=1000:
import pandas as pd
yourfile = pd.read_csv('path/to/your/file.csv',nrows=1000)
There is no specific method to read number of lines exposed by file object.
I guess the easiest way would be following:
lines =[]
with open(file_name) as f:
lines.extend(f.readline() for i in xrange(N))
The two most intuitive ways of doing this would be:
Iterate on the file line-by-line, and break after N lines.
Iterate on the file line-by-line using the next() method N times. (This is essentially just a different syntax for what the top answer does.)
Here is the code:
# Method 1:
with open("fileName", "r") as f:
counter = 0
for line in f:
print line
counter += 1
if counter == N: break
# Method 2:
with open("fileName", "r") as f:
for i in xrange(N):
line = f.next()
print line
The bottom line is, as long as you don't use readlines() or enumerateing the whole file into memory, you have plenty of options.
Based on gnibbler top voted answer (Nov 20 '09 at 0:27): this class add head() and tail() method to file object.
class File(file):
def head(self, lines_2find=1):
self.seek(0) #Rewind file
return [self.next() for x in xrange(lines_2find)]
def tail(self, lines_2find=1):
self.seek(0, 2) #go to end of file
bytes_in_file = self.tell()
lines_found, total_bytes_scanned = 0, 0
while (lines_2find+1 > lines_found and
bytes_in_file > total_bytes_scanned):
byte_block = min(1024, bytes_in_file-total_bytes_scanned)
self.seek(-(byte_block+total_bytes_scanned), 2)
total_bytes_scanned += byte_block
lines_found += self.read(1024).count('\n')
self.seek(-total_bytes_scanned, 2)
line_list = list(self.readlines())
return line_list[-lines_2find:]
Usage:
f = File('path/to/file', 'r')
f.head(3)
f.tail(3)
most convinient way on my own:
LINE_COUNT = 3
print [s for (i, s) in enumerate(open('test.txt')) if i < LINE_COUNT]
Solution based on List Comprehension
The function open() supports an iteration interface. The enumerate() covers open() and return tuples (index, item), then we check that we're inside an accepted range (if i < LINE_COUNT) and then simply print the result.
Enjoy the Python. ;)
For first 5 lines, simply do:
N=5
with open("data_file", "r") as file:
for i in range(N):
print file.next()
If you want something that obviously (without looking up esoteric stuff in manuals) works without imports and try/except and works on a fair range of Python 2.x versions (2.2 to 2.6):
def headn(file_name, n):
"""Like *x head -N command"""
result = []
nlines = 0
assert n >= 1
for line in open(file_name):
result.append(line)
nlines += 1
if nlines >= n:
break
return result
if __name__ == "__main__":
import sys
rval = headn(sys.argv[1], int(sys.argv[2]))
print rval
print len(rval)
If you have a really big file, and assuming you want the output to be a numpy array, using np.genfromtxt will freeze your computer. This is so much better in my experience:
def load_big_file(fname,maxrows):
'''only works for well-formed text file of space-separated doubles'''
rows = [] # unknown number of lines, so use list
with open(fname) as f:
j=0
for line in f:
if j==maxrows:
break
else:
line = [float(s) for s in line.split()]
rows.append(np.array(line, dtype = np.double))
j+=1
return np.vstack(rows) # convert list of vectors to array
This worked for me
f = open("history_export.csv", "r")
line= 5
for x in range(line):
a = f.readline()
print(a)
I would like to handle the file with less than n-lines by reading the whole file
def head(filename: str, n: int):
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
Credit go to John La Rooy and Ilian Iliev. Use the function for the best performance with exception handle
Revise 1: Thanks FrankM for the feedback, to handle file existence and read permission we can futher add
import errno
import os
def head(filename: str, n: int):
if not os.path.isfile(filename):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename)
if not os.access(filename, os.R_OK):
raise PermissionError(errno.EACCES, os.strerror(errno.EACCES), filename)
try:
with open(filename) as f:
head_lines = [next(f).rstrip() for x in range(n)]
except StopIteration:
with open(filename) as f:
head_lines = f.read().splitlines()
return head_lines
You can either go with second version or go with the first one and handle the file exception later. The check is quick and mostly free from performance standpoint
Starting at Python 2.6, you can take advantage of more sophisticated functions in the IO base clase. So the top rated answer above can be rewritten as:
with open("datafile") as myfile:
head = myfile.readlines(N)
print head
(You don't have to worry about your file having less than N lines since no StopIteration exception is thrown.)
This works for Python 2 & 3:
from itertools import islice
with open('/tmp/filename.txt') as inf:
for line in islice(inf, N, N+M):
print(line)
fname = input("Enter file name: ")
num_lines = 0
with open(fname, 'r') as f: #lines count
for line in f:
num_lines += 1
num_lines_input = int (input("Enter line numbers: "))
if num_lines_input <= num_lines:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
else:
f = open(fname, "r")
for x in range(num_lines_input):
a = f.readline()
print(a)
print("Don't have", num_lines_input, " lines print as much as you can")
print("Total lines in the text",num_lines)
Here's another decent solution with a list comprehension:
file = open('file.txt', 'r')
lines = [next(file) for x in range(3)] # first 3 lines will be in this list
file.close()
An easy way to get first 10 lines:
with open('fileName.txt', mode = 'r') as file:
list = [line.rstrip('\n') for line in file][:10]
print(list)
#!/usr/bin/python
import subprocess
p = subprocess.Popen(["tail", "-n 3", "passlist"], stdout=subprocess.PIPE)
output, err = p.communicate()
print output
This Method Worked for me
Simply Convert your CSV file object to a list using list(file_data)
import csv;
with open('your_csv_file.csv') as file_obj:
file_data = csv.reader(file_obj);
file_list = list(file_data)
for row in file_list[:4]:
print(row)
I have to access the Nth line in a CSV file.
Here's what I did:
import csv
the_file = open('path', 'r')
reader = csv.reader(the_file)
N = input('What line do you need? > ')
i = 0
for row in reader:
if i == N:
print("This is the line.")
print(row)
break
i += 1
the_file.close()
...but this does not feel optimal. Edit for precision: If the file is huge, I do not want to go through all the lines and I do not want to have to load the whole file into memory.
I do hope something like reader[N] exists, but I have not found it.
Edit for answer: This line (coming from chosen answer) is what I was looking for:
next(itertools.islice(csv.reader(f), N, None)
You can use enumerate to iterate through the list until you find the right row:
for i, row in enumerate(reader):
if i == line_number:
print("This is the line.")
print(row)
break
You can also use itertools.islice which is designed for this type of scenario - accessing a particular slice of an iterable without reading the whole thing into memory. It should be a bit more efficient than looping through the unwanted rows.
def get_csv_line(path, line_number):
with open(path) as f:
return next(itertools.islice(csv.reader(f), line_number, None))
But if your CSV file is small, just read the entire thing into a list, which you can then access with an index in the normal way. This also has the advantage that you can access several different rows in random order without having to reset the csv reader.
with open(path) as f:
my_csv_data = list(csv.reader(f))
print(my_csv_data[line_number])
Your solution is actually not that bad. Advancing the file iterator to the line you want is a good approach and is used in many situations like this.
If you want it more concise though, you can use next and enumerate with a generator expression:
import csv
the_file = open('path', 'r')
reader = csv.reader(the_file)
N = int(input('What line do you need? > '))
line = next((x for i, x in enumerate(reader) if i == N), None)
print(line)
the_file.close()
The None in there is what will be returned if the line is not found (N is too large). You can pick any other value though.
You could also open the file with a with-statement to have it be automatically closed:
import csv
with open('path', 'r') as the_file:
reader = csv.reader(the_file)
N = int(input('What line do you need? > '))
line = next((x for i, x in enumerate(reader) if i == N), None)
print(line)
If you really want to cut down on size, you could do:
from csv import reader
N = int(input('What line do you need? > '))
with open('path') as f:
print(next((x for i, x in enumerate(reader(f)) if i == N), None))
The itertools module has a number of functions for creating specialized iterators — and its islice() function could be used to easily solve this problem:
import csv
import itertools
N = 5 # desired line number
with open('path.csv', newline='') as the_file:
row = next(csv.reader(itertools.islice(the_file, N, N+1)))
print("This is the line.")
print(row)
P.S. For the curious, my initial response — which also works (arguably better) — was:
row = next(itertools.islice(csv.reader(the_file), N, N+1))
You can simply do:
n = 2 # line to print
fd = open('foo.csv', 'r')
lines = fd.readlines()
print lines[n-1] # prints 2nd line
fd.close()
Or even better to utilize less memory by not loading entire file into memory:
import linecache
n = 2
linecache.getline('foo.csv', n)
You could minimize your for loop into a comprehension expression, e.g.
row = [row for i,row in enumerate(reader) if i == N][0]
# or even nicer as seen in iCodez code with next and generator expression
row = next(row for i,row in enumerate(reader) if i == N)
import csv
with open('cvs_file.csv', 'r') as inFile:
reader = csv.reader(inFile)
my_content = list(reader)
line_no = input('What line do you need(line number begins from 0)? > ')
if line_no < len(my_content):
print(my_content[line_no])
else:
print('This line does not exists')
As a result now you can get any line by its index directly:
What line do you need? > 2
['101', '0.19', '1']
What line do you need? > 100
This line does not exists
I want to open a file, and simply return the contents of said file with each line beginning with the line number.
So hypothetically if the contents of a is
a
b
c
I would like the result to be
1: a
2: b
3: c
Im kind of stuck, tried enumerating but it doesn't give me the desired format.
Is for Uni, but only a practice test.
A couple bits of trial code to prove I have no idea what I'm doing / where to start
def print_numbered_lines(filename):
"""returns the infile data with a line number infront of the contents"""
in_file = open(filename, 'r').readlines()
list_1 = []
for line in in_file:
for item in line:
item.index(item)
list_1.append(item)
return list_1
def print_numbered_lines(filename):
"""returns the infile data with a line number infront of the contents"""
in_file = open(filename, 'r').readlines()
result = []
for i in in_file:
result.append(enumerate(i))
return result
A file handle can be treated as an iterable.
with open('tree_game2.txt') as f:
for i, line in enumerate(f):
print ("{0}: {1}".format(i+1,line))
There seems no need to write a python script, awk would solve your problem.
awk '{print NR": "$1}' your_file > new_file
What about using an OrderedDict
from collections import OrderedDict
c = OrderedDict()
n = 1
with open('file.txt', 'r') as f:
for line in f:
c.update({n:line})
#if you just want to print it, skip the dict part and just do:
print n,line
n += 1
Then you can print it out with:
for n,line in c.iteritems(): #.items() if Python3
print k,line
the simple way to do it:
1st:with open the file -----2ed:using count mechanism:
for example:
data = object of file.read()
lines = data.split("\n")
count =0
for line in lines:
print("line "+str(count)+">"+str()+line)
count+=1
I was trying to extract even lines from a text file and output to a new file. But with my codes python warns me "list index out of range". Anyone can help me? THANKS~
Code:
f = open('input.txt', 'r')
i = 0
j = 0
num_lines = sum(1 for line in f)
newline = [0] * num_lines
print (num_lines)
for i in range(1, num_lines):
if i % 2 == 0:
newline[i] = f.readlines()[i]
print i, newline[i]
i = i + 1
f.close()
f = open('output.txt', 'w')
for j in range(0,num_lines):
if j % 2 == 0:
f.write(newline[j] + '\n')
j = j + 1
f.close()
Output:
17
Traceback (most recent call last):
File "./5", line 10, in <module>
a = f.readlines()[1]
IndexError: list index out of range
After
num_lines = sum(1 for line in f)
The file pointer in f is at the end of the file. Therefore any subsequent call of f.readlines() gives an empty list. The minimal fix is to use f.seek(0) to return to the start of the file.
However, a better solution would be to read through the file only once, e.g. using enumerate to get the line and its index i:
newline = []
for i, line in enumerate(f):
if i % 2 == 0:
newline.append(line)
In your original script you read the file once to scan the number of lines, then you (try to) read the lines in memory, you needlessly create a list for the full size instead of just extending it with list.append, you initialize the list with zeroes which does not make sense for a list containing strings, etc.
Thus, this script does what your original idea was, but better and simpler and faster:
with open('input.txt', 'r') as inf, open('output.txt', 'w') as outf:
for lineno, line in enumerate(inf, 1):
if lineno % 2 == 0:
outf.write(line)
Specifically
open the files with with statement so that they are automatically closed when
the block is exited.
write as they are read
as lines are numbered 1-based, use the enumerate with the start value 1 so that you truly get the even numbered lines.
You've also got the itertools.islice approach available:
from itertools import islice
with open('input') as fin, open('output', 'w') as fout:
fout.writelines(islice(fin, None, None, 2))
This saves the modulus operation and puts the line writing to system level.
This question already has answers here:
Python how to read N number of lines at a time
(7 answers)
Closed 7 months ago.
I need to read a big file by reading at most N lines at a time, until EOF. What is the most effective way of doing it in Python? Something like:
with open(filename, 'r') as infile:
while not EOF:
lines = [get next N lines]
process(lines)
One solution would be a list comprehension and the slice operator:
with open(filename, 'r') as infile:
lines = [line for line in infile][:N]
After this lines is tuple of lines. However, this would load the complete file into memory. If you don't want this (i.e. if the file could be really large) there is another solution using a generator expression and islice from the itertools package:
from itertools import islice
with open(filename, 'r') as infile:
lines_gen = islice(infile, N)
lines_gen is a generator object, that gives you each line of the file and can be used in a loop like this:
for line in lines_gen:
print line
Both solutions give you up to N lines (or fewer, if the file doesn't have that much).
A file object is an iterator over lines in Python. To iterate over the file N lines at a time, you could use grouper() function in the Itertools Recipes section of the documenation. (Also see What is the most “pythonic” way to iterate over a list in chunks?):
try:
from itertools import izip_longest
except ImportError: # Python 3
from itertools import zip_longest as izip_longest
def grouper(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return izip_longest(*args, fillvalue=fillvalue)
Example
with open(filename) as f:
for lines in grouper(f, N, ''):
assert len(lines) == N
# process N lines here
This code will work with any count of lines in file and any N. If you have 1100 lines in file and N = 200, you will get 5 times to process chunks of 200 lines and one time with 100 lines.
with open(filename, 'r') as infile:
lines = []
for line in infile:
lines.append(line)
if len(lines) >= N:
process(lines)
lines = []
if len(lines) > 0:
process(lines)
maybe:
for x in range(N):
lines.append(f.readline())
I think you should be using chunks instead of specifying the number of lines to read. It makes your code more robust and generic. Even if the lines are big, using chunk will upload only the assigned amount of data into memory.
Refer to this link
I needed to read in n lines at a time from files for extremely large files (~1TB) and wrote a simple package to do this. If you pip install bigread, you can do:
from bigread import Reader
stream = Reader(file='large.txt', block_size=10)
for i in stream:
print(i)
block_size is the number of lines to read at a time.
This package is no longer maintained. I now find it best to use:
with open('big.txt') as f:
for line_idx, line in enumerate(f):
print(line)
If you need a memory of previous lines, just store them in a list. If you need to know future lines to decide what to do with the current line, store the current line in a list until you get to that future line...
How about a for loop?
with open(filename, 'r') as infile:
while not EOF:
lines = []
for i in range(next N lines):
lines.append(infile.readline())
process(lines)
You may have to do something as simple as:
lines = [infile.readline() for _ in range(N)]
Update after comments:
lines = [line for line in [infile.readline() for _ in range(N)] if len(line) ]
def get_lines_iterator(filename, n=10):
with open(filename) as fp:
lines = []
for i, line in enumerate(fp):
if i % n == 0 and i != 0:
yield lines
lines = []
lines.append(line)
if lines:
yield lines
for lines in b():
print(lines)
It is simpler with islice:
from itertools import islice
def get_lines_iterator(filename, n=10):
with open(filename) as fp:
while True:
lines = list(islice(fp, n))
if lines:
yield lines
else:
break
for lines in get_lines_iterator(filename):
print(lines)
Another way to do this:
from itertools import islice
def get_lines_iterator(filename, n=10):
with open(filename) as fp:
for line in fp:
yield [line] + list(islice(fp, n-1))
for lines in get_lines_iterator(filename):
print(lines)
If you can read the full file in ahead of time;
infile = open(filename, 'r').readlines()
my_block = [line.strip() for line in infile[:N]]
cur_pos = 0
while my_block:
print (my_block)
cur_pos +=1
my_block = [line.strip() for line in infile[cur_pos*N:(cur_pos +1)*N]]
I was looking for an answer to the same question, but did not really like any of the proposed stuff earlier, so I ended up writing this slightly ugly thing that does exactly what I wanted without using strange libraries.
def test(filename, N):
with open(filename, 'r') as infile:
lines = []
for line in infile:
line = line.strip()
if len(lines) < N-1:
lines.append(line)
else:
lines.append(line)
res = lines
lines = []
yield res
else:
if len(lines) != 0:
yield lines