Skip header when writing to an open CSV - python

I am compiling a load of CSVs into one. The first CSV contains the headers, which I am opening in write mode (maincsv). I am then making a list of all the others which live in a different folder and attempting to append them to the main one.
It works, however it just writes over the headings. I just want to start appending from line 2. I'm sure it's pretty simple but all the next(), etc. things I try just throw errors. The headings and data are aligned if that helps.
import os, csv
maincsv = open(r"C:\Data\OSdata\codepo_gb\CodepointUK.csv", 'w', newline='')
maincsvwriter = csv.writer(maincsv)
curdir = os.chdir(r"C:\Data\OSdata\codepo_gb\Data\CSV")
csvlist = os.listdir()
csvfiles = []
for file in csvlist:
path = os.path.abspath(file)
csvfiles.append(path)
for incsv in csvfiles:
opencsv = open(incsv)
csvreader = csv.reader(opencsv)
for row in csvreader:
maincsvwriter.writerow(row)
maincsv.close()

To simplify things I have the code load all the files in the directory the python code is run in. This will get the first line of the first .csv file and use it as the header.
import os
count=0
collection=open('collection.csv', 'a')
files=[f for f in os.listdir('.') if os.path.isfile(f)]
for f in files:
if ('.csv' in f):
solecsv=open(f,'r')
if count==0:
# assuming header is 1 line
header=solecsv.readline()
collection.write(header)
for x in solecsv:
if not (header in x):
collection.write(x)
collection.close()

Related

How to replace the header of all CSV files in a directory?

I have a folder of CSV files, and I need to simple replace the current header (first row), of the csv, with a different header. As an example, ever CSV has: A, B, C, D, E as the first first row header, but I need to be able to change that to whatever I want; i.e., Apple, Orange, Lemon, Pear, Peach || or, || 1, j, er, fd, j5
All the data in each CSV needs to be retained besides the header, and the replacement header will make all headers of all CSVs in the folder identical, per what is indicated in the code.
import shutil
import glob
files = glob.glob("/home/robert/Testing/D1/*.csv")
for i in range(len(files)):
from_file = open(files[i])
to_file = open(files[i], mode="w")
to_file.write("id,t,s,p,date,e")
shutil.copyfileobj(from_file, to_file)
I used this code, however, it deleted all of the other data in the CSV files, which I needed to keep, and only left/created the headers
from glob import glob
from pathlib import Path
def update_folder(folder: Path):
for fname in folder.glob('*.csv'):
with open(fname) as fin:
lines = fin.readlines() # element 0 is A,B,C...
lines[0] = 'Apple,Orange,Lemon\n'
with open(fname, 'w') as fout:
fout.write(''.join(readlines))
I would suggest using the Python's tempfile module to create a temporary file with the changes in it and then, after they're made, it can simply be renamed to replaced the original file. I would also using its csv module to read the original and write the updated version because it fast, debugged, and can handle many varieties of CSV.
Using the combination make the task relatively easy:
import csv
import os
from pathlib import Path
from tempfile import NamedTemporaryFile
CSV_FOLDER = Path('/home/robert/Testing/D1')
NEW_HEADER = 'id,t,s,p,date,e'.split(',')
for filepath in CSV_FOLDER.glob('*.csv'):
with open(filepath, 'r', newline='') as csv_file, \
NamedTemporaryFile('w', newline='', dir=filepath.parent, delete=False) \
as tmp_file:
reader = csv.reader(csv_file)
writer =csv.writer(tmp_file)
next(reader) # Skip header.
writer.writerow(NEW_HEADER) # Replacement.
writer.writerows(reader) # Copy remaining rows of original file.
os.replace(tmp_file.name, filepath) # Replace original file with updated version.
print('CSV files updated')

Read a file from a folder and extract a specific key from the file and save as in CSV file

I'm new to Python and the task I am performing is to extract a specific key value from a list of .iris ( which contains the list of nested dictionary format) files in a specific directory.
I wanted to extract the specific value and save it as a new .csv file and repeat it for all other files.
Below is my sample of .iris file from which I should extract only for the these keys ('uid','enabled','login','name').
{"streamType":"user",
"uid":17182,
"enabled":true,
"login":"xyz",
"name":"abcdef",
"comment":"",
"authSms":"",
"email":"",
"phone":"",
"location":"",
"extraLdapOu":"",
"mand":997,
"global":{
"userAccount":"View",
"uid":"",
"retention":"No",
"enabled":"",
"messages":"Change"},
"grants":[{"mand":997,"role":1051,"passOnToSubMand":true}],
I am trying to convert the .iris file to .json and reading the files one by, but unfortunately, I am not getting the exact output as desired.
Please, could anyone help me?
My code (added from comments):
import os
import csv
path = ''
os.chdir(path)
# Read iris File
def read_iris_file(file_path):
with open(file_path, 'r') as f:
print(f.read())
# iterate through all files
for file in os.listdir():
# Check whether file is in iris format or not
if file.endswith(".iris"):
file_path = f"{path}\{file}"
# call read iris file function
print(read_iris_file(file_path))
Your files contain data in JSON format, so we can use built-in json module to parse it. To iterate over files with certain extension you can use pathlib.glob() with next pattern "*.iris". Then we can use csv.DictWriter() and pass "ignore" to extrasaction argument which will make DictWriter ignore keys which we don't need and write only those which we passed to fieldnames argument.
Code:
import csv
import json
from pathlib import Path
path = Path(r"path/to/folder")
keys = "uid", "enabled", "login", "name"
with open(path / "result.csv", "w", newline="") as out_f:
writer = csv.DictWriter(out_f, fieldnames=keys, extrasaction='ignore')
writer.writeheader()
for file in path.glob("*.iris"):
with open(file) as inp_f:
data = json.load(inp_f)
writer.writerow(data)
Try the below (the key point here is loading the iris file using ast)
import ast
fields = ('uid','enabled','login','name')
with open('my.iris') as f1:
data = ast.literal_eval(f1.read())
with open('my.csv','w') as f2:
f2.write(','.join(fields) + '\n')
f2.write(','.join(data[f] for f in fields) + '\n')
my.csv
uid,enabled,login,name
17182,true,xyz,abcdef

filtering images named in a csv

I have a csv file with a list of image names and I want to filter the corresponding images into a new folder. Here is what I hoped could work but it doesn't. I get no error message, so I guess it iterates through the for loops but never returns a True at the if-section but I can't figure out why.
I already tried out str() but it still doesn't work.
Any ideas?
Thank you!
with open(csvFilePath, 'r', encoding='utf-8') as inp:
# run through every row of the file
for row in csv.reader(inp):
# search for filename in folder
for file in os.walk(imgFilePath):
if file == row[column]:
shutil.copy2(file, newImgPath)
Found a solution:
Little bit different approach. First we generate a list with all items of the certain column we are interested in. Then we check if the filenames are listed in the list. If True we copy the file to the new folder.
import os
import shutil
import csv
def test(csvFilePath, imgFilePath, newImgPath):
img_list = []
with open(csvFilePath, "r") as csv_file:
csv_reader = csv.reader(csv_file, delimiter = ',')
for rows in csv_reader:
img_list.append(rows[0])
for root, dirs, files in os.walk(imgFilePath):
for file in files:
if file in img_list:
shutil.copy(os.path.join(root,file), newImgPath)

Python Create and edit file in each subdirectory

I have a primary folder filled with subfolders, and each contains files with a particular naming scheme. I have unit-tested a function for creating and editing a text document within a single directory based on information in these files, but am now running into issues trying to get this function to iterate over every subdirectory.
Problem:
I am getting a "KeyError" for line 38 if (row["r_id"]) in filters:. This is because the file br_ids.csv is not being created. In the unit test this was functioning fine, so I can only assume it is some issue with how I am using os.walk.
Code:
import csv
import os
with open('hasf.txt','w') as hf:
for root, subFolders, files in os.walk('/path/to/topdir/'):
#if folder contains 'f_r.csv', list the path in 'hasf.txt'
if 'f_r.csv' in files:
hf.write("%s\n" % root)
if 'r.csv' in files:
with open(os.path.join(root, "r.csv")) as inf, open(os.path.join(root, "br_ids.csv"), "w") as output:
reader = csv.DictReader(inf, quotechar='"')
headers = ["r_id"]
writer_br = csv.DictWriter(output, headers, extrasaction='ignore')
writer_br.writeheader()
for row in reader:
if int(row["r_type"]) == 3:
writer_br.writerow(row)
# End creating br_ids
# parse the data you're about to filter with
with open(os.path.join(root, 'br_ids.csv'), 'r') as f:
filters = {(row["r_id"]) for row in csv.DictReader(f, delimiter=',', quotechar='"')}
with open(os.path.join(root, 'bt_ids.csv'), 'w') as out_f:
headers = ["t_id"]
out = csv.DictWriter(out_f, headers, extrasaction='ignore')
out.writeheader()
# go thru your rows and see if the matching(row[r_id]) is
# found in the previously parsed set of filters; if yes, skip the row
with open(os.path.join(root, 't.csv'), 'r') as f:
for row in csv.DictReader(f, delimiter=','):
if (row["r_id"]) in filters:
out.writerow(row)
I have gone through a few similar questions here, but none of them have directly hit on creating, editing, and using a file inside of each location of an os.walk. This is my first time using Python, and I am at somewhat of a loss. Also, if there is any way to make my other code more pythonic, I am all ears.
Thanks!
It turns out the issue was directly the KeyError - in some of the folders, br_id.csv had zero entries, and was throwing a KeyError because of this. The way I solved it was with try, like so:
# parse the data you're about to filter with
with open(os.path.join(root, 'br_ids.csv'), 'r') as f:
filters = {(row["r_id"]) for row in csv.DictReader(f, delimiter=',', quotechar='"')}
with open(os.path.join(root, 'bt_ids.csv'), 'w') as out_f:
headers = ["t_id"]
out = csv.DictWriter(out_f, headers, extrasaction='ignore')
out.writeheader()
# go thru your rows and see if the matching(row[r_id]) is
# found in the previously parsed set of filters; if yes, skip the row
with open(os.path.join(root, 't.csv'), 'r') as f:
for row in csv.DictReader(f, delimiter=','):
try:
if (row["r_id"]) in filters:
out.writerow(row)
except KeyError:
continue
In another case I had a if (row["r_id"]) not in filters: and bypassed this using the same method, except that if it returned a KeyError, then it went ahead and did out.writerow(row).

Add file name as last column of CSV file

I have a Python script which modifies a CSV file to add the filename as the last column:
import sys
import glob
for filename in glob.glob(sys.argv[1]):
file = open(filename)
data = [line.rstrip() + "," + filename for line in file]
file.close()
file = open(filename, "w")
file.write("\n".join(data))
file.close()
Unfortunately, it also adds the filename to the header (first) row of the file. I would like the string "ID" added to the header instead. Can anybody suggest how I could do this?
Have a look at the official csv module.
Here are a few minor notes on your current code:
It's a bad idea to use file as a variable name, since that shadows the built-in type.
You can close the file objects automatically by using the with syntax.
Don't you want to add an extra column in the header line, called something like Filename, rather than just omitting a column in the first row?
If your filenames have commas (or, less probably, newlines) in them, you'll need to make sure that the filename is quoted - just appending it won't do.
That last consideration would incline me to use the csv module instead, which will deal with the quoting and unquoting for you. For example, you could try something like the following code:
import glob
import csv
import sys
for filename in glob.glob(sys.argv[1]):
data = []
with open(filename) as finput:
for i, row in enumerate(csv.reader(finput)):
to_append = "Filename" if i == 0 else filename
data.append(row+[to_append])
with open(filename,'wb') as foutput:
writer = csv.writer(foutput)
for row in data:
writer.writerow(row)
That may quote the data slightly differently from your input file, so you might want to play with the quoting options for csv.reader and csv.writer described in the documentation for the csv module.
As a further point, you might have good reasons for taking a glob as a parameter rather than just the files on the command line, but it's a bit surprising - you'll have to call your script as ./whatever.py '*.csv' rather than just ./whatever.py *.csv. Instead, you could just do:
for filename in sys.argv[1:]:
... and let the shell expand your glob before the script knows anything about it.
One last thing - the current approach you're taking is slightly dangerous, in that if anything fails when writing back to the same filename, you'll lose data. The standard way of avoiding this is to instead write to a temporary file, and, if that was successful, rename the temporary file over the original. So, you might rewrite the whole thing as:
import csv
import sys
import tempfile
import shutil
for filename in sys.argv[1:]:
tmp = tempfile.NamedTemporaryFile(delete=False)
with open(filename) as finput:
with open(tmp.name,'wb') as ftmp:
writer = csv.writer(ftmp)
for i, row in enumerate(csv.reader(finput)):
to_append = "Filename" if i == 0 else filename
writer.writerow(row+[to_append])
shutil.move(tmp.name,filename)
You can try:
data = [file.readline().rstrip() + ",id"]
data += [line.rstrip() + "," + filename for line in file]
You can try changing your code, but using the csv module is recommended. This should give you the result you want:
import sys
import glob
import csv
filename = glob.glob(sys.argv[1])[0]
yourfile = csv.reader(open(filename, 'rw'))
csv_output=[]
for row in yourfile:
if len(csv_output) != 0: # skip the header
row.append(filename)
csv_output.append(row)
yourfile = csv.writer(open(filename,'w'),delimiter=',')
yourfile.writerows(csv_output)
Use the CSV module that comes with Python.
import csv
import sys
def process_file(filename):
# Read the contents of the file into a list of lines.
f = open(filename, 'r')
contents = f.readlines()
f.close()
# Use a CSV reader to parse the contents.
reader = csv.reader(contents)
# Open the output and create a CSV writer for it.
f = open(filename, 'wb')
writer = csv.writer(f)
# Process the header.
header = reader.next()
header.append('ID')
writer.writerow(header)
# Process each row of the body.
for row in reader:
row.append(filename)
writer.writerow(row)
# Close the file and we're done.
f.close()
# Run the function on all command-line arguments. Note that this does no
# checking for things such as file existence or permissions.
map(process_file, sys.argv[1:])
You can run this as follows:
blair#blair-eeepc:~$ python csv_add_filename.py file1.csv file2.csv
you can use fileinput to do in place editing
import sys
import glob
import fileinput
for filename in glob.glob(sys.argv[1]):
for line in fileinput.FileInput(filename,inplace=1) :
if fileinput.lineno()==1:
print line.rstrip() + " ID"
else
print line.rstrip() + "," + filename

Categories