Python: reading N number from file, M at time - python

My file is this one:
14
3
21
37
48
12
4
6
22
4
How can I read M number at time? for example 4 at time. Is it necessary to use two for loops?
My goal is to create (N/M)+1 lists with M numbers inside every lists, except the final list (it's the reminder of division N/M)

You can use python list slice operator to fetch the number of required elements from a file by reading a file using readlines() where each element of list will be one line of file.
with open("filename") as myfile:
firstNtoMlines = myfile.readlines()[N:N+M] # the interval you want to read
print firstNtoMlines

Use itertools.islice,
import itertools
import math
filename = 'test.dat'
N = 9
M = 4
num_rest_lines = N
nrof_lists = int(math.ceil(N*1.0/M))
with open(filename, 'r') as f:
for i in range(nrof_lists):
num_lines = min(num_rest_lines, M)
lines_gen = itertools.islice(f, num_lines)
l = [int(line.rstrip()) for line in lines_gen]
num_rest_lines = num_rest_lines - M
print(l)
# Output
[14, 3, 21, 37]
[48, 12, 4, 6]
[22]
Previous answer: Iterate over a file (N lines) in chunks (every M lines), forming a list of N/M+1 lists.
import itertools
def grouper(iterable, n, fillvalue=None):
"""iterate in chunks"""
args = [iter(iterable)] * n
return itertools.izip_longest(*args, fillvalue=fillvalue)
# Test
filename = 'test.dat'
m = 4
fillvalue = '0'
with open(filename, 'r') as f:
lists = [[int(item.rstrip()) for item in chuck] for chuck in grouper(f, m, fillvalue=fillvalue)]
print(lists)
# Output
[[14, 3, 21, 37], [48, 12, 4, 6], [22, 4, 0, 0]]

Now my code is this one:
N = 4
M = 0
while (M < 633):
with open("/Users/Lorenzo/Desktop/X","r") as myFile:
res = myFile.readlines()[M:N]
print(res)
M+=4
N+=4
so, It should work. My file's got 633 numbers

This has been asked before.
from itertools import izip_longest
izip_longest(*(iter(yourlist),) * yourgroupsize)
For the case of grouping lines in a file into lists of size 4:
with open("file.txt", "r") as f:
res = izip_longest(*(iter(f)),) * 4)
print res
Alternative way to split a list into groups of n

Related

Python Extract Numbers from a file

So, I have a txt file with some integers which are between 0 and 50. I want to extract them and to use their values.
The txt file looks like:
1 2 40 23
2 34 12
3 12 1
I have tried something like:
with open(input_file, "r") as file:
lines = file.readlines()
for i in range(len(lines)):
l = lines[i].strip()
for c in range(1, len(l)-1):
if(l[c] >= '0' and l[c] <= '9' and (l[c+1] < '0' or l[c+1] > '9')):
# other code with those numbers
elif(l[c] >= '0' and l[c] <= '9' and (l[c+1] >= '0' and l[c+1] <= '9')):
# other code with those numbers
The problem is that I extract the two digits numbers, but I do also extract one digit two digits numbers.
Any solution?
Or this way:
my_array=[]
with io.open(inputfile, mode="r", encoding="utf-8") as f:
for line in f:
my_array=my_array+line.split()
results = list(map(int, myarray)) #convert to int
print(my_array)
Output:
[1, 2, 40, 23, 2, 34, 12, 3, 12, 1]
You can gather all the numbers in the file into a list like this:
import re
with open(input_file) as f:
print(list(map(int, re.findall('\d+', f.read()))))
Output:
[1, 2, 40, 23, 2, 34, 12, 3, 12, 1]
Note:
Use of re may be unnecessary in OP's case but included here because it allows for potential garbage in the input file

Iterate through a file reading first N values

I am reading 3 lines at a time from a file which has numbers 1,2,3...100
I want the output to look something like this
1
2
3
2
3
4
3
4
5
However with the following code, it is printing continuous numbers
with open("/home/osboxes/num", "r+") as f:
for line in f:
print(line)
line2 = f.__next__()
print(line2)
line3 = f.__next__()
print(line3)
Is there a way to go back to the iteration and skip the file line and display the output as shown above
Let's assume that instead of your file object we have an iterator like iter(range(100)) in order to produce our expected result using next you can copy the iterator using itertools.tee as many times as you want and create a zip from your iterators based on your expected output:
In [3]: r = iter(range(100))
In [4]: from itertools import tee
In [5]: r, n, m = tee(r, 3) # copy the iterator 3 times
In [6]: next(n) # consume the first item of n
Out[6]: 0
In [7]: next(m);next(m) # consume the first 2 items of m
Out[7]: 1
In [8]: list(zip(r, n, m))
#Out[8]:
#[(0, 1, 2),
# (1, 2, 3),
# (2, 3, 4),
# (3, 4, 5),
# (4, 5, 6),
# (5, 6, 7),
# ...
Now you can do the same thing with file object:
from itertools import tee
with open("/home/osboxes/num", "r+") as f:
f, n, m = tee(f, 3)
next(n);next(m);next(m)
for i, j , k in zip(r, n, m):
print(i, j, k) # or do something else with i,j,k
If it's a smaller file as you mentioned, then you can use following code, but if it's much bigger than prefer using seek() method:
with open("abc.txt", "r+") as f:
data = f.readlines()
for i in range(2, len(data)):
print("%s %s %s" % (data[i-2].rstrip(), data[i-1].rstrip(), data[i].rstrip()), end = " ")
Output:
1 2 3 2 3 4 3 4 5
If storing the whole file in a variable isn't a problem, an easy solution would be:
with open("num", "r+") as f:
lines = f.read().splitlines()
for i in range(len(lines) - 2):
print(lines[i])
print(lines[i + 1])
print(lines[i + 2])
For a more efficient solution, see #Kasramvd solution using iterators.
As an alternative without iterators, you can store the last 2 values:
with open("num", "r+") as f:
prev1, prev2 = None, None
for line in f:
if prev1 is not None and prev2 is not None:
print(prev1)
print(prev2)
print(line)
prev1, prev2 = prev2, line

Concatenating rows in CSV file Using islice

Basically, I have a list which indicates the line number of the CSV file, and I want to concatenate rows that follows the list.
For instance, my list is [0, 7, 10, 11, 27, 31]
This means I want to concatenate my rows from line1 to line 7 into a single row.
Line 8 to 10 to a single row.
Line 11 to 11 (same line so it will simply do nothing)
Line 12 to 27
Line 28 to 31
I have tried using a while loop and islice from itertools. However, I only get the output of Line 1 to Line 7.
Here is my code.
import csv
from itertools import islice
with open('csvtest.csv', 'rb') as f:
reader = csv.reader(f)
#row1 = next(reader)
merged = []
list = [0, 7, 10, 11, 27, 31]
x=0
while x < len(list):
for line in islice(f, list[x], list[x+1]):
#print line1
line = line.rstrip()
merged.append(line)
x += 1
print merged #gives ['fsfs', 'sf', '1231', 'afsa', '', '', 'asfasfsaf;0'] which is lines 1 to 7
Would anyone let me know what happened to my while loop? Or is it a problem with the append list part?
I have fixed the code, basically you need to change how to implement islice.
Updating the answer on the basis of new information.
import csv
from itertools import islice
with open('output2.csv','wb') as w:
writer = csv.writer(w)
delimiter_list = []
merged = []
with open('csvtest.csv', 'rb') as f:
reader = csv.reader(f)
for num, line in enumerate(reader, 1):
line = (" ".join(line))
if line.endswith(';0'):
#print 'found at line:', num
delimiter_list.append(num)
with open('csvtest.csv', 'rb') as f:
x=0
while x < len(delimiter_list)-1:
row = []
# islice(f,N) returns next N lines
for line in islice(f, delimiter_list[x+1]-delimiter_list[x]):
line = line.rstrip()
row.append(line)
x += 1
# add each row to final list
merged.append(row)
print merged
writer.writerows(merged)

File handling in Python

Im a python noob and I'm stuck on a problem.
filehandler = open("data.txt", "r")
alist = filehandler.readlines()
def insertionSort(alist):
for line in alist:
line = list(map(int, line.split()))
print(line)
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
print(line)
insertionSort(alist)
for line in alist:
print line
Output:
[4, 19, 2, 5, 11]
[4, 2, 5, 11, 19]
[8, 1, 2, 3, 4, 5, 6, 1, 2]
[8, 1, 1, 2, 2, 3, 4, 5, 6]
4 19 2 5 11
8 1 2 3 4 5 6 1 2
I am supposed to sort lines of values from a file. The first value in the line represents the number of values to be sorted. I am supposed to display the values in the file in sorted order.
The print calls in insertionSort are just for debugging purposes.
The top four lines of output show that the insertion sort seems to be working. I can't figure out why when I print the lists after calling insertionSort the values are not sorted.
I am new to Stack Overflow and Python so please let me know if this question is misplaced.
for line in alist:
line = list(map(int, line.split()))
line starts out as eg "4 19 2 5 11". You split it and convert to int, ie [4, 19, 2, 5, 11].
You then assign this new value to list - but list is a local variable, the new value never gets stored back into alist.
Also, list is a terrible variable name because there is already a list data-type (and the variable name will keep you from being able to use the data-type).
Let's reorganize your program:
def load_file(fname):
with open(fname) as inf:
# -> list of list of int
data = [[int(i) for i in line.split()] for line in inf]
return data
def insertion_sort(row):
# `row` is a list of int
#
# your sorting code goes here
#
return row
def save_file(fname, data):
with open(fname, "w") as outf:
# list of list of int -> list of str
lines = [" ".join(str(i) for i in row) for row in data]
outf.write("\n".join(lines))
def main():
data = load_file("data.txt")
data = [insertion_sort(row) for row in data]
save_file("sorted_data.txt", data)
if __name__ == "__main__":
main()
Actually, with your data - where the first number in each row isn't actually data to sort - you would be better to do
data = [row[:1] + insertion_sort(row[1:]) for row in data]
so that the logic of insertion_sort is cleaner.
As #Barmar mentioned above, you are not modifying the input to the function. You could do the following:
def insertionSort(alist):
blist = []
for line in alist:
line = list(map(int, line.split()))
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
blist.append(line)
return blist
blist = insertionSort(alist)
print(blist)
Alternatively, modify alist "in-place":
def insertionSort(alist):
for k, line in enumerate(alist):
line = list(map(int, line.split()))
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
alist[k] = line
insertionSort(alist)
print(alist)

Permutations in python 2.5.2

I have a list of numbers for input, e.g.
671.00
1,636.00
436.00
9,224.00
and I want to generate all possible sums with a way to id it for output, e.g.:
671.00 + 1,636.00 = 2,307.00
671.00 + 436.00 = 1,107.00
671.00 + 9,224.00 = 9,224.00
671.00 + 1,636.00 + 436.00 = 2,743.00
...
and I would like to do it in Python
My current constrains are:
a) I'm just learning python now (that's part of the idea)
b) I will have to use Python 2.5.2 (no intertools)
I think I have found a piece of code that may help:
def all_perms(str):
if len(str) <=1:
yield str
else:
for perm in all_perms(str[1:]):
for i in range(len(perm)+1):
#nb str[0:1] works in both string and list contexts
yield perm[:i] + str[0:1] + perm[i:]
( from these guys )
But I'm not sure how to use it in my propose.
Could someone trow some tips and pieces of code of help?
cheers,
f.
Permutations are about taking an ordered set of things and moving these things around (i.e. changing order). Your question is about combinations of things from your list.
Now, an easy way of enumerating combinations is by mapping entries from your list to bits in a number. For example, lets assume that if bit #0 is set (i.e. 1), then number lst[0] participates in the combination, if bit #1 is set, then lst[1] participates in the combination, etc. This way, numbers in range 0 <= n < 2**(len(lst)) identify all possible combinations of lst members, including an empty one (n = 0) and the whole lst (n = 2**(len(lst)) - 1).
You need only combinations of 2 items or more, i.e. only those combination IDs that have at least two nonzero bits in their binary representation. Here is how to identify these:
def HasAtLeastTwoBitsSet(x) :
return (x & (x-1)) != 0
# Testing:
>>> [x for x in range(33) if HasAtLeastTwoBitsSet(x)]
[3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
Next step is to extract a combination of list members identified by a combination id. This is easy, thanks to the power of list comprehensions:
def GetSublistByCombination(lst, combination_id) :
res = [x for (i,x) in enumerate(lst) if combination_id & (1 << i)]
return res
# Testing:
>>> GetSublistByCombination([0,1,2,3], 1)
[0]
>>> GetSublistByCombination([0,1,2,3], 3)
[0, 1]
>>> GetSublistByCombination([0,1,2,3], 12)
[2, 3]
>>> GetSublistByCombination([0,1,2,3], 15)
[0, 1, 2, 3]
Now let's make a generator that produces all sums, together with their string representations:
def IterAllSums(lst) :
combinations = [i for i in range(1 << len(lst)) if HasAtLeastTwoBitsSet(i)]
for comb in combinations :
sublist = GetSublistByCombination(lst, comb)
sum_str = '+'.join(map(str, sublist))
sum_val = sum(sublist)
yield (sum_str, sum_val)
And, finally, let's use it:
>>> for sum_str, sum_val in IterAllSums([1,2,3,4]) : print sum_str, sum_val
1+2 3
1+3 4
2+3 5
1+2+3 6
1+4 5
2+4 6
1+2+4 7
3+4 7
1+3+4 8
2+3+4 9
1+2+3+4 10
The code below generates all "subsets" of a given list (except the empty set), i.e. it returns a list of lists.
def all_sums(l): #assumes that l is non-empty
if len(l)==1:
return ([[l[0]]])
if len(l)==0:
return []
result = []
for i in range(0,len(l)):
result.append([l[i]])
for p in all_sums(l[i+1:]):
result.append([l[i]]+p)
return result
Now you could just write a short function doit for output also:
def doit(l):
mylist = all_sums(l)
print mylist
for i in mylist:
print str(i) + " = " + str(sum(i))
doit([1,2,3,4])
With itertools (Python >=2.6) would be:
from itertools import *
a=[1,2,3,4]
sumVal=[tuple(imap(sum,combinations(a,i))) for i in range(2,len(a)+1)]

Categories