Read Textfile Write new CSV file - python

Currently I have the following code which prints my desired lines from a .KAP file.
f = open('120301.KAP')
for line in f:
if line.startswith('PLY'):
print line
This results in the following output
PLY/1,48.107478621032,-69.733975000000
PLY/2,48.163516399836,-70.032838888053
PLY/3,48.270000002883,-70.032838888053
PLY/4,48.270000002883,-69.712824977522
PLY/5,48.192379262383,-69.711801581207
PLY/6,48.191666671083,-69.532840015422
PLY/7,48.033358898628,-69.532840015422
PLY/8,48.033359033880,-69.733975000000
PLY/9,48.107478621032,-69.733975000000
My goal is not to have it just print these lines. I'd like to have a CSV file created named 120301.csv with the coordinates in there own columns (leaving the PLY/# behind). Simple enough? I've been trying different import CSV functions for awhile now. I can't seem to get anywhere.

Step by step, since it looks like you're struggling with some basics:
f_in = open("120301.KAP")
f_out = open("outfile.csv", "w")
for line in f_in:
if line.startswith("PLY"): # copy this line to the new file
# split it into columns, ignoring the first one ("PLY/x")
_, col1, col2 = line.split(",")
# format your output
outstring = col1 + "," + col2 + "\n"
# and write it out
f_out.write(outstring)
f_in.close()
f_out.close() # really bad practice, but I'll get to that
Of course this is really not the best way to do this. There's a reason we have things like the csv module.
import csv
with open("120301.KAP") as inf, open("outfile.csv", "wb") as outf:
reader = csv.reader(inf)
writer = csv.writer(outf)
for row in reader:
# if the first cell of row starts with "PLY"...
if row[0].startswith("PLY"):
# write out the row, ignoring the first column
writer.writerow(row[1:])
# opening the files using the "with" context managers means you don't have
# to remember to close them when you're done with them.

Related

How to read a csv file and create a new csv file after every nth number of rows?

I'm trying to write a function that reads a sheet of an existing .csv file and every 20 rows are copied to a newly created csv file. Therefore, it needs to be designed like a file counter "file_01, file_02, file_04,...," where the first 20 rows are copied to file_01, the next 20 to file_02.csv, and so on.
Currently I have this code which hasn't worked for me work so far.
import csv
import os.path
from itertools import islice
N = 20
new_filename = ""
filename = ""
with open(filename, "rb") as file: # the a opens it in append mode
reader = csv.reader(file)
for i in range(N):
line = next(file).strip()
#print(line)
with open(new_filename, 'wb') as outfh:
writer = csv.writer(outfh)
writer.writerow(line)
writer.writerows(islice(reader, 2))
I have attached a file for testing.
https://1drv.ms/u/s!AhdJmaLEPcR8htYqFooEoYUwDzdZbg
32.01,18.42,58.98,33.02,55.37,63.25,12.82,-32.42,33.99,179.53,
41.11,33.94,67.85,57.61,59.23,94.69,19.43,-19.15,21.71,-161.13,
49.80,54.12,72.78,100.74,56.97,128.84,26.95,-6.76,10.07,-142.62,
55.49,81.02,68.93,148.17,49.25,157.32,34.94,5.39,0.44,-123.32,
56.01,112.81,59.27,177.87,38.50,179.63,43.43,18.42,-5.81,-102.24,
50.79,142.87,48.06,-162.32,26.60,-161.21,52.38,34.37,-7.42,-79.64,
41.54,167.36,37.12,-145.93,15.01,-142.84,60.90,57.05,-4.47,-56.54,
30.28,-172.09,27.36,-130.24,5.11,-123.66,66.24,91.12,-0.76,-35.44,
18.64,-153.20,19.52,-114.09,-1.54,-102.96,64.77,131.32,5.12,-21.68,
7.92,-134.07,14.24,-96.93,-3.79,-80.91,57.10,162.35,12.51,-9.21,
-0.34,-113.74,11.80,-78.73,-2.49,-58.46,46.75,-175.86,20.81,2.87,
-4.81,-91.85,11.78,-60.28,0.59,-39.26,35.75,-158.12,29.79,15.71,
-4.76,-68.67,13.79,-43.84,6.82,-24.69,25.27,-141.56,39.05,30.71,
-1.33,-46.42,18.44,-30.23,14.53,-11.95,16.21,-124.45,47.91,50.25,
4.14,-29.61,24.89,-18.02,23.01,0.10,9.59,-106.05,54.46,77.07,
11.04,-15.39,32.33,-6.66,31.92,12.48,6.24,-86.34,55.72,110.53,
18.69,-2.32,40.46,4.57,41.11,26.87,6.07,-65.68,50.25,142.78,
26.94,10.56,49.18,16.67,49.92,45.39,8.06,-46.86,40.13,168.29,
35.80,24.58,58.45,31.99,56.83,70.92,12.96,-31.90,28.10,-171.07,
44.90,41.72,67.41,55.89,59.21,103.94,19.63,-18.67,15.97,-152.40,
-5.41,-77.62,11.40,-63.21,4.80,-29.06,31.33,-151.44,43.00,37.25,
-2.88,-54.38,13.08,-46.00,12.16,-15.86,21.21,-134.62,51.25,59.16,
1.69,-35.73,17.44,-32.01,20.37,-3.78,13.06,-117.10,56.18,88.98,
8.15,-20.80,23.70,-19.66,29.11,8.29,7.74,-98.22,54.91,123.30,
15.52,-7.45,31.04,-8.22,38.22,21.78,5.76,-77.99,47.34,153.31,
23.53,5.38,39.07,2.98,47.29,38.71,6.58,-57.45,36.18,176.74,
32.16,18.76,47.71,14.88,55.08,61.71,9.76,-40.52,23.99,-163.75,
41.27,34.36,56.93,29.53,59.23,92.75,15.53,-26.40,12.16,-145.27,
49.92,54.65,66.04,51.59,57.34,126.97,22.59,-13.65,2.14,-126.20,
55.50,81.56,72.21,90.19,49.88,155.84,30.32,-1.48,-4.71,-105.49,
55.92,113.45,70.26,139.40,39.23,178.48,38.55,10.92,-7.09,-83.11,
50.58,143.40,61.40,172.50,27.38,-162.27,47.25,24.86,-4.77,-60.15,
41.30,167.74,50.34,-166.33,15.74,-143.93,56.21,43.14,-0.54,-38.22,
30.03,-171.78,39.24,-149.48,5.71,-124.87,63.77,70.19,4.75,-24.15,
18.40,-152.91,29.17,-133.78,-1.18,-104.31,66.51,108.81,11.86,-11.51,
7.69,-133.71,20.84,-117.74,-3.72,-82.28,61.95,146.15,20.05,0.65,
-0.52,-113.33,14.97,-100.79,-2.58,-59.75,52.78,172.46,28.91,13.29,
-4.91,-91.36,11.92,-82.84,0.34,-40.12,41.93,-167.91,38.21,27.90,
These are some of the problems with your current solution.
You created a csv.reader object but then you did not use it
You read each line but then you did not store them anywhere
You are not keeping track of 20 rows which was supposed to be your requirement
You created the output file in a separate with block which does not have access anymore to the read lines or the csv.reader object
Here's a working solution:
import csv
inp_file = "input.csv"
out_file_pattern = "file_{:{fill}2}.csv"
max_rows = 20
with open(inp_file, "r") as inp_f:
reader = csv.reader(inp_f)
all_rows = []
cur_file = 1
for row in reader:
all_rows.append(row)
if len(all_rows) == max_rows:
with open(out_file_pattern.format(cur_file, fill="0"), "w") as out_f:
writer = csv.writer(out_f)
writer.writerows(all_rows)
all_rows = []
cur_file += 1
The flow is as follows:
Read each row of the CSV using a csv.reader
Store each row in an all_rows list
Once that list gets 20 rows, open a file and write all the rows to it
Use the csv.writer's writerows method
Use a cur_file counter to format the filename
Every time 20 rows are dumped to a file, empty out the list and increment the file counter
This solution includes the blank lines as part of the 20 rows. Your test file has actually 19 rows of CSV data and 1 row for a blank line. If you need to skip the blank line, just add a simple check of
if not row:
continue
Also, as I mentioned in a comment, I assume that the input file is an actual CSV file, meaning it's a plain text file with CSV formatted data. If the input is actually an Excel file, then solutions like this won't work, because you'll need some special libraries to read Excel files, even if the contents visually looks like CSV or even if you rename the file to .csv.
Without using any special CSV libraries (e.g. csv, though you could, just that I don't know how to use them, however don't think it is necessary for this case), you could:
excel_csv_fp = open(r"<file_name>", "r", encoding="utf-8") # Check proper encoding for your file
csv_data = excel_csv_fp.readlines()
file_counter = 0
new_file_name = ""
new_fp = ""
for line in csv_data:
if line == "":
if new_fp != "":
new_fp.close()
file_counter += 1
new_file_name = "file_" + "{:02d}".format(file_counter) # 1 turns into 01 and 10 turns 10 i.e. remains the same
new_fp = open("<some_path>/" + new_file_name + ".csv", "w", encoding="utf-8") # Makes a new CSV file to start writing to
elif new_fp != "": # Updated code to make sure new_fp is a file pointer and not a string
new_fp.write(line) # Write each line after a space
If you have any questions on any of the code (how it works, why I choose what etc.), just ask in the comments and I'll try to reply as soon as possible.

How to copy multiple rows and one column from one CSV file to another CSV Excel?

I am extremely new to python(coding, for that matter).
Could I please get some help as to how can I achieve this. I have gone through numerous threads but nothing helped.
My input file looks like this:
I want my output file to look like this:
Just replication of the first column, twice in the second excel sheet. With a line after every 5 rows.
A .csv file can be opened with a normal text editor, do this and you'll see that the entries for each column are comma-separated (csv = comma separated values). Most likely it's semicolons ;, though.
Since you're new to coding, I recommend trying it manually with a text editor first until you have the desired output, and then try to replicate it with python.
Also, you should post code examples here and ask specific questions about why it doesn't work like you expected it to work.
Below is the solution. Don't forget to configure input/output files and the delimiter:
input_file = 'c:\Temp\input.csv'
output_file = 'c:\Temp\output.csv'
delimiter = ';'
i = 0
output_data = ''
with open(input_file) as f:
for line in f:
i += 1
output_data += line.strip() + delimiter + line
if i == 5:
output_data += '\n'
i = 0
with open(output_file, 'w') as file_:
file_.write(output_data)
Python has a csv module for doing this. It is able to automatically read each row into a list of columns. It is then possible to simply take the first element and replicate it into the second column in an output file.
import csv
with open('input.csv', 'rb') as f_input:
csv_input = csv.reader(f_input)
input_rows = list(csv_input)
with open('output.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
for line, row in enumerate(input_rows, start=1):
csv_output.writerow([row[0], row[0]])
if line % 5 == 0:
csv_output.writerow([])
Note, it is not advisable to write the updated data directly over the input file as if there was a problem you would lose your original file.
If your input file has multiple columns, this script will remove them and simple duplicate the first column.
By default, the csv format separates each column using a comma, this can be modified by specifying a desired delimiter as follows:
csv_output = csv.writer(f_output, delimiter=';')

Python csv to dictionary with first line as title

I have a file file.csv with some data:
fn,ln,tel
john,doe,023322
jul,dap,024322
jab,sac,0485
I would like to have an array that I can access like this:
file = 'file.csv'
with open(file,'rU') as f:
reader = csv.DictReader(f)
print reader[0].fn
So I would like that it prints the first name from the first record. Unfortunately, I get this error:
ValueError: I/O operation on closed file
How can I get it done so that I don't need to keep the file opened and that I can play with my array. Btw, I don't need to write back in the csv file, I just need to use the data and for that, an array that I can modify would be best.
You need to access the reader *within the with block, not outside of it:
file = 'file.csv'
with open(file,'rU') as f:
reader = csv.DictReader(f)
first_row = next(reader)
print first_row['fn']
As soon as you move code outside the block, the f file object is closed and you cannot obtain rows from the reader anymore. This is kind of the point of the with statement.
If you want to have random access to all rows in the file, convert the reader to a list first:
file = 'file.csv'
with open(file,'rU') as f:
reader = csv.DictReader(f)
all_rows = list(reader)
print all_rows[0]['fn']
The list() call will iterate over the reader, adding each result yielded to the list object until all rows are read. Make sure you have enough memory to hold all those rows.

Writing a filtered CSV file to a new file and iterating through a folder

I have been trying initially to create a program to go through one file and select certain columns that will then be moved to a new text file. So far I have
import os, sys, csv
os.chdir("C://Users//nelsonj//Desktop//Master_Project")
with open('CHS_2009_test.txt', "rb") as sitefile:
reader = csv.reader(sitefile, delimiter=',')
pref_cols = [0,1,2,4,6,8,10,12,14,18,20,22,24,26,30,34,36,40]
for row in reader:
new_cols = list(row[i] for i in pref_cols)
print new_cols
I have been trying to use the csv functions to write the new file but I am continuosly getting errors. I will eventually need to do this over a folder of files, but thought I would try to do it on one before tackling that.
Code I attempted to use to write this data to a new file
for row in reader:
with open("CHS_2009_edit.txt", 'w') as file:
new_cols = list(row[i] for i in pref_cols)
newfile = csv.writer(file)
newfile.writerows(new_cols)
This kind of works in that I get a new file, but in only prints the second row of values from my csv, i.e., not the header values and places commas in between each individual character, not just copying over the original columns as they were.
I am using PythonWin with Python 2.6(from ArcGIS)
Thanks for the help!
NEW UPDATED CODE
import os, sys, csv
path = ('C://Users//nelsonj//Desktop//Master_Project')
for filename in os.listdir(path):
pref_cols = [0,1,2,4,6,8,10,12,14,18,20,22,24,26,30,34,36,40]
with open(filename, "rb") as sitefile:
with open(filename.rsplit('.',1)[0] + "_Master.txt", 'w') as output_file:
reader = csv.reader(sitefile, delimiter=',')
writer = csv.writer(output_file)
for row in reader:
new_row = list(row[i] for i in pref_cols)
writer.writerow(new_row)
print new_row
Getting list index out of range for the new_row, but it seems to still be processing the file. Only thing I can't get it to do now is loop through all files in my directory. Here's a hyperlink to Screenshot of data text file
Try this:
new_header = list(row[i] for i in pref_cols if i in row)
That should avoid the error, but it may not avoid the underlying problem. Would you paste your CSV file somewhere that I can access, and I'll fix this for you?
For your purpose of filtering, you don't have to treat the header differently from the rest of the data. You can go ahead remove the following block:
headers = reader.next()
for row in headers:
new_header = list(row[i] for i in pref_cols)
print new_header
Your code did not work because you treated headers as a list of rows, but headers is just one row.
Update
This update deals with writing the CSV data to a new file. You should move the open statement above the for row...
with open("CHS_2009_edit.txt", 'w') as output_file:
writer = csv.writer(output_file)
for row in reader:
new_cols = list(row[i] for i in pref_cols)
writer.writerows(new_cols)
Update 2
This update deals with the header output problem. If you followed my suggestions, you should not have this problem. I don't know what your current code looks like, but it looks like you supplies a string where the code expects a list. Here is the code that I tried on my system (using my made-up data) and it seems to work:
pref_cols = [...] # <<=== Should be set before entering the loop
with open('CHS_2009_test.txt', "rb") as sitefile:
with open('CHS_2009_edit.txt', 'w') as output_file:
reader = csv.reader(sitefile, delimiter=',')
writer = csv.writer(output_file)
for row in reader:
new_row = list(row[i] for i in pref_cols)
writer.writerow(new_row)
One thing to notice: I use writerow() to write a single row, where you use writerows() -- that makes a difference.

How to multiply one column by another in a csv file and re-write

I have some data in csv format and I need to perform a multiplication of the 3rd column by the 6th column and append the results to the end. My data is as follows:
TITLE,TITLE,T,T,T,T
data,data,5,data,data,98.7,data
data,data,2,data,data,97,data
data,data,5,data,data,98,data
data,data,4,data,data,8.7,data
data,data,5,data,data,9.7,data
data,data,12.5,data,data,198.7,data
I am really new to coding, but my attempt was as follows:
import csv
import datetime
import copy
from collections import defaultdict
class_col = 2
data_col = 5
with open('minitest.csv', 'r') as f:
data = [line.strip().split(',') for line in f]
for row in data:
class_col*data_col
with open('minitest_edit.csv', 'w') as nf:
nf.write('\n'.join(','.join(row) for row in data))
print "done"
I didn't get any errors come up, any suggestions? Thanks OS"
Use csv.reader and csv.writer:
import csv
with open('minitest.csv', 'rb') as f:
reader = csv.reader(f)
data = [next(reader)] # title row
for row in reader:
data.append(row + [float(row[2]) * float(row[5])])
with open('minitest.csv', 'wb') as nf:
writer = csv.writer(nf)
writer.writerows(data)
produces:
TITLE,TITLE,T,T,T,T
data,data,5,data,data,98.7,data,493.5
data,data,2,data,data,97,data,194.0
data,data,5,data,data,98,data,490.0
data,data,4,data,data,8.7,data,34.8
data,data,5,data,data,9.7,data,48.5
data,data,12.5,data,data,198.7,data,2483.75
You want the following to multiply your columns and append them to the original ones:
new_data = []
for row in data:
new_data.append(row + [float(row[class_col]) * float(row[data_col])])
This is how one way to do it more efficiently, i.e. by not reading the data in memory, which will matter with larger data sets:
import csv
import tempfile
import shutil
input_file = 'minitest.csv'
with open(input_file, 'rb') as f, \
tempfile.NamedTemporaryFile(delete=False) as out_f:
# in order to be able to not have to read everything in memory, we have to
# write every processed row to disk immediatley; for that, we need a temporary
# file because we can't read and write a single file at the same time:
reader = csv.reader(f)
writer = csv.writer(out_f)
# header row
writer.writerow(next(reader))
# note that this uses a generator not a list, so that writerows will lazily
# evaluate each row as it writes them to disk
writer.writerows(row + [float(row[2]) * float(row[5])] for row in reader)
# one everything's done, overwrite the original file with the new contents.
shutil.move(out_f.name, input_file)
P.S. it would actually be simpler if you just didn't write back to the same file—you can always do the moving manually after the processing code has finished, to keep the processing code simpler.
If you are lucky and progamming on LUNIX you can use awk:
awk -F "\"*,\"*" '{print $0 "," $3*$6 }' test.csv > result.csv
And you can call it with Python http://docs.python.org/2/library/subprocess.html.

Categories