Concatenating rows in CSV file Using islice - python

Basically, I have a list which indicates the line number of the CSV file, and I want to concatenate rows that follows the list.
For instance, my list is [0, 7, 10, 11, 27, 31]
This means I want to concatenate my rows from line1 to line 7 into a single row.
Line 8 to 10 to a single row.
Line 11 to 11 (same line so it will simply do nothing)
Line 12 to 27
Line 28 to 31
I have tried using a while loop and islice from itertools. However, I only get the output of Line 1 to Line 7.
Here is my code.
import csv
from itertools import islice
with open('csvtest.csv', 'rb') as f:
reader = csv.reader(f)
#row1 = next(reader)
merged = []
list = [0, 7, 10, 11, 27, 31]
x=0
while x < len(list):
for line in islice(f, list[x], list[x+1]):
#print line1
line = line.rstrip()
merged.append(line)
x += 1
print merged #gives ['fsfs', 'sf', '1231', 'afsa', '', '', 'asfasfsaf;0'] which is lines 1 to 7
Would anyone let me know what happened to my while loop? Or is it a problem with the append list part?

I have fixed the code, basically you need to change how to implement islice.
Updating the answer on the basis of new information.
import csv
from itertools import islice
with open('output2.csv','wb') as w:
writer = csv.writer(w)
delimiter_list = []
merged = []
with open('csvtest.csv', 'rb') as f:
reader = csv.reader(f)
for num, line in enumerate(reader, 1):
line = (" ".join(line))
if line.endswith(';0'):
#print 'found at line:', num
delimiter_list.append(num)
with open('csvtest.csv', 'rb') as f:
x=0
while x < len(delimiter_list)-1:
row = []
# islice(f,N) returns next N lines
for line in islice(f, delimiter_list[x+1]-delimiter_list[x]):
line = line.rstrip()
row.append(line)
x += 1
# add each row to final list
merged.append(row)
print merged
writer.writerows(merged)

Related

using while loop to read files

I have 200 files, from which I wanna choose the second column. I wanna store the second column of each file in a list called "colv". I wanna have colv[0]=[second column of the first file. colv[1] be the second column of the second file and so on.
I write this code, but it does not work, in this code colv[0] is the first number of the second column of the first file. Can anyone help me how to fix this issue:
colv = []
i = 1
colvar = "step7_1.colvar"
while os.path.isfile(colvar):
with open(colvar, "r") as f_in:
line = next(f_in)
for line in f_in:
a = line.split()[1]
colv.append(a)
i+=1
colvar = "step7_%d.colvar" %i
How about using Pandas' read_csv() since you mention that the data has a table-like structure. In particular, you could use
import pandas as pd
colv = []
i = 1
colvar = "step7_1.colvar"
while os.path.isfile(colvar):
df = pd.read_csv(colvar, sep=',')
colv.append(list(df[df.columns[1]]))
i+=1
colvar = "step7_%d.colvar" %i
It returned
>colv
[[5, 6, 7, 8], [8, 9, 10, 11], [12, 13, 14, 15]]
for my vanilla sample files step7_%d.colvar.
You might need to adjust the separator character with sep.
Use a list comprehension to get the 2nd element of all the lines into a list.
colv = []
i = 1
colvar = "step7_1.colvar"
while os.path.isfile(colvar):
with open(colvar, "r") as f_in:
readline(f_in) # skip 1st line
data = [line.split()[1] for line in f_in]
colv.append(data)
i+=1
colvar = "step7_%d.colvar" %i

How to write several commas in a CSV file in python

I'm converting a .txt file with annotations into another annotation format in a .csv file. The annotation format is as follows: filepath,x1,y1,x2,y2,classname. For pictures which haven't an instance of any class in them, annotation is like this: filepath,,,,,.
The problem is, that the .writerrow method of the csv.writer class doesn't write more than one comma after another.
My code is like this:
with open(annotation_file, 'r') as file:
lines = file.readlines()
splitted_lines = [line.split(' ') for line in lines]
with open(out_file, 'w', newline = '') as out:
csv_writer = csv.writer(out,delimiter= ';' )
for l in splitted_lines:
if len(l) == 1:
# indicate empty images
csv_writer.writerow([l[0] + ',,,,,'])
l is a list that contains a single string, so by l[0] + ',,,,,' I want to concatenate l with five commas.
Thank you in advance
set missing values as empty strings and fill the list
with open(annotation_file, 'r') as file:
lines = file.readlines()
splitted_lines = [line.split(' ') for line in lines]
with open(out_file, 'w', newline='') as out:
csv_writer = csv.writer(out, delimiter=';')
for l in splitted_lines:
if len(l) == 1:
# indicate empty images
csv_writer.writerow(l + ['' for _ in range(5)])
else:
csv_writer.writerow(l)
Given sample data:
data = [
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6],
[1, 2, 3, 4, 5, 6],
[1],
]
it outputs:
1;2;3;4;5;6
1;2;3;4;5;6
1;2;3;4;5;6
1;;;;;
which is inline with what you want
I discovered my problem, l is a string which contained a '\n' at the end. Because of this the writer wasn't able to write the five commas to the string. I changed the code like displayed below what fixed the problem.
with open(annotation_file, 'r') as file:
lines = file.readlines()
splitted_lines = [line.split(' ') for line in lines]
with open(out_file, 'w', newline = '') as out:
csv_writer = csv.writer(out,delimiter= ';' )
for l in splitted_lines:
if len(l) == 1:
# indicate empty images
l[0] = l[0].replace('\n', '')
csv_writer.writerow([l[0] + ',,,,,'])
else:
csv_writer.writerow(['something else'])
Thanks anyway #DelphiX

Appending numbers from a file into a list by it's value

I have a file containing numerical values:
1
4
6
10
12
and I'm trying to append these values into its respective position in an array where i would obtain:
[None,1,None,None,4,None,6,None,None,None,10,None,None,12]
Since 1 from the file would be at index 1 in the list, 4 from the file would be at index 4 in the list and so on.
I begin by first reading in the file:
filename = open("numbers.txt", "r", encoding = "utf-8")
numfile = filename
lst = [None] * 12
for line in numfile:
line = line.strip() #strip new line
line = int(line) #making the values in integer form
vlist.append(line) #was thinking of line[val] where value is the number itself.
print(vlist)
but I'm getting the output:
[None,None,None,None,None,None,None,None,None,1,4,6,10,12]
Where the numbers are appended to the far right of the array. Would appreciate some help on this.
Assuming you have your number as integers in a list called numbers (which you have no problem doing as it seems), you can do:
lst = [None if i not in numbers else i for i in range(max(numbers)+1)]
if numbers can be a big list, I would cast it to set first to make the in comparisons faster.
numbers = set(numbers)
lst = [None if i not in numbers else i for i in range(max(numbers)+1)]
Example
>>> numbers = [1, 4, 6, 10, 12]
>>> [None if i not in numbers else i for i in range(max(numbers) + 1)]
[None, 1, None, None, 4, None, 6, None, None, None, 10, None, 12]
Appending to a list adds the numbers at the end of the list. You instead want to assign the value of line to the list at the index of line
filename = open("numbers.txt", "r", encoding = "utf-8")
numfile = filename
lst = [None] * 13
for line in numfile:
line = line.strip() #strip new line
line = int(line) #making the values in integer form
lst[line] = line
print(lst)
# [None, 1, None, None, 4, None, 6, None, None, None, 10, None, 12]
You can use an indexer and compare its value with the actual value of the line, and replace the value at the index rather than
filename = open("numbers.txt", "r", encoding = "utf-8")
numfile = filename
lst = [None] * 12
i = 0
for line in numfile:
line = line.strip() #strip new line
line = int(line) #making the values in integer form
value = None
if (i == line):
value = line
if (len(vlist) < line): #replace or append if there's more numbers in the file than the size of the list
vlist[i] = value
else:
vlist.append(value)
i += 1
print(vlist)
Appending to a list in a loop is expensive. I advise your construct a list of None items and then iterate your list to update elements.
Below is a demo with itertools and csv.reader:
from io import StringIO
from itertools import chain
import csv
mystr = StringIO("""1
4
6
10
12""")
# replace mystr with open('numbers.txt', 'r')
with mystr as f:
reader = csv.reader(f)
num_list = list(map(int, chain.from_iterable(reader)))
res = [None] * (num_list[-1]+1)
for i in num_list:
res[i] = i
print(res)
[None, 1, None, None, 4, None, 6, None, None, None, 10, None, 12]
Benchmarking example:
def appender1(n):
return [None]*int(n)
def appender2(n):
lst = []
for i in range(int(n)):
lst.append(None)
return lst
%timeit appender1(1e7) # 90.4 ms per loop
%timeit appender2(1e7) # 1.77 s per loop

File handling in Python

Im a python noob and I'm stuck on a problem.
filehandler = open("data.txt", "r")
alist = filehandler.readlines()
def insertionSort(alist):
for line in alist:
line = list(map(int, line.split()))
print(line)
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
print(line)
insertionSort(alist)
for line in alist:
print line
Output:
[4, 19, 2, 5, 11]
[4, 2, 5, 11, 19]
[8, 1, 2, 3, 4, 5, 6, 1, 2]
[8, 1, 1, 2, 2, 3, 4, 5, 6]
4 19 2 5 11
8 1 2 3 4 5 6 1 2
I am supposed to sort lines of values from a file. The first value in the line represents the number of values to be sorted. I am supposed to display the values in the file in sorted order.
The print calls in insertionSort are just for debugging purposes.
The top four lines of output show that the insertion sort seems to be working. I can't figure out why when I print the lists after calling insertionSort the values are not sorted.
I am new to Stack Overflow and Python so please let me know if this question is misplaced.
for line in alist:
line = list(map(int, line.split()))
line starts out as eg "4 19 2 5 11". You split it and convert to int, ie [4, 19, 2, 5, 11].
You then assign this new value to list - but list is a local variable, the new value never gets stored back into alist.
Also, list is a terrible variable name because there is already a list data-type (and the variable name will keep you from being able to use the data-type).
Let's reorganize your program:
def load_file(fname):
with open(fname) as inf:
# -> list of list of int
data = [[int(i) for i in line.split()] for line in inf]
return data
def insertion_sort(row):
# `row` is a list of int
#
# your sorting code goes here
#
return row
def save_file(fname, data):
with open(fname, "w") as outf:
# list of list of int -> list of str
lines = [" ".join(str(i) for i in row) for row in data]
outf.write("\n".join(lines))
def main():
data = load_file("data.txt")
data = [insertion_sort(row) for row in data]
save_file("sorted_data.txt", data)
if __name__ == "__main__":
main()
Actually, with your data - where the first number in each row isn't actually data to sort - you would be better to do
data = [row[:1] + insertion_sort(row[1:]) for row in data]
so that the logic of insertion_sort is cleaner.
As #Barmar mentioned above, you are not modifying the input to the function. You could do the following:
def insertionSort(alist):
blist = []
for line in alist:
line = list(map(int, line.split()))
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
blist.append(line)
return blist
blist = insertionSort(alist)
print(blist)
Alternatively, modify alist "in-place":
def insertionSort(alist):
for k, line in enumerate(alist):
line = list(map(int, line.split()))
for index in range(2, len(line)):
currentvalue = line[index]
position = index
while position>1 and line[position-1]>currentvalue:
line[position]=line[position-1]
position = position-1
line[position]=currentvalue
alist[k] = line
insertionSort(alist)
print(alist)

Python: reading N number from file, M at time

My file is this one:
14
3
21
37
48
12
4
6
22
4
How can I read M number at time? for example 4 at time. Is it necessary to use two for loops?
My goal is to create (N/M)+1 lists with M numbers inside every lists, except the final list (it's the reminder of division N/M)
You can use python list slice operator to fetch the number of required elements from a file by reading a file using readlines() where each element of list will be one line of file.
with open("filename") as myfile:
firstNtoMlines = myfile.readlines()[N:N+M] # the interval you want to read
print firstNtoMlines
Use itertools.islice,
import itertools
import math
filename = 'test.dat'
N = 9
M = 4
num_rest_lines = N
nrof_lists = int(math.ceil(N*1.0/M))
with open(filename, 'r') as f:
for i in range(nrof_lists):
num_lines = min(num_rest_lines, M)
lines_gen = itertools.islice(f, num_lines)
l = [int(line.rstrip()) for line in lines_gen]
num_rest_lines = num_rest_lines - M
print(l)
# Output
[14, 3, 21, 37]
[48, 12, 4, 6]
[22]
Previous answer: Iterate over a file (N lines) in chunks (every M lines), forming a list of N/M+1 lists.
import itertools
def grouper(iterable, n, fillvalue=None):
"""iterate in chunks"""
args = [iter(iterable)] * n
return itertools.izip_longest(*args, fillvalue=fillvalue)
# Test
filename = 'test.dat'
m = 4
fillvalue = '0'
with open(filename, 'r') as f:
lists = [[int(item.rstrip()) for item in chuck] for chuck in grouper(f, m, fillvalue=fillvalue)]
print(lists)
# Output
[[14, 3, 21, 37], [48, 12, 4, 6], [22, 4, 0, 0]]
Now my code is this one:
N = 4
M = 0
while (M < 633):
with open("/Users/Lorenzo/Desktop/X","r") as myFile:
res = myFile.readlines()[M:N]
print(res)
M+=4
N+=4
so, It should work. My file's got 633 numbers
This has been asked before.
from itertools import izip_longest
izip_longest(*(iter(yourlist),) * yourgroupsize)
For the case of grouping lines in a file into lists of size 4:
with open("file.txt", "r") as f:
res = izip_longest(*(iter(f)),) * 4)
print res
Alternative way to split a list into groups of n

Categories