Add file name to first column at start of .csv file

Add file name to first column at start of .csv file - python

I'm trying to add a new "filename" column at the start of a .csv file then add the name of the file to each line
Example input.csv
Date,Day,Server
2018-5-20,Su,ASA
2018-5-21,Su,ASA
Example Output
Filename,Date,Day,Server
input,2018-5-20,Su,ASA
input,2018-5-21,Su,ASA
It look's easy to add the filename to the end of the .csv using the csv module, but adding a new column at the start is looking more tricky.
Any help is appreciated
So far i have the below from searching silmilar questions online
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
for row in csv_input:
row.insert(0, fname)
csv_output.writerow(row)
Using the above i'm unable to set a header name, instead it adds the whole path to the file name to the first column
C:\Python\input.csv,Date,Day,Server
C:\Python\input.csv,2018-5 -20,Su,ASA
C:\Python\input.csv,2018-5 -21,Su,ASA

The first row contains the header of CSV file. While writing a new csv file the first writerow should be used to write the header.
import csv, glob, os
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
# Header processing
header = csv_input.__next__()
header.insert(0, "filename")
csv_output.writerow(header)
for row in csv_input:
print(row)
row.insert(0, fname)
csv_output.writerow(row)
Note I suggest using csv.DictWriter and csv.DictReader which can be used to specify the field names to write/read and has a writeheader function. This doesn't give any significant advantage but make our code cleaner.

this should work
import os.path
import csv
import glob
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
fname = os.path.split(fname).pop()
for i, row in enumerate(csv_input):
if i == 0:
row.insert(0, 'Filename')
else:
row.insert(0, fname)
csv_output.writerow(row)

Related

Data is adjacent to headers in csv

how can I put my first row of data in the csv under the header and not in the same row as header?
This is the results.
And down here is my coding.
import os
# ...
filename = 'C:/Desktop/GPS_Trial/Trial6/' + str(d1) + '_' + str(file_counter) +'.csv'
#check whether the file exist or not
rows_to_be_written = []
if not os.path.exists(filename):
rows_to_be_written.append(header1)
rows_to_be_written.append(header2)
rows_to_be_written.append(header3)
rows_to_be_written.append(gps)
rows_to_be_written.append(gps2)
rows_to_be_written.append(gps3)
#write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)

You write header with values in one row if it file not exists.
You should write it separately
rows_to_be_written = []
header = None
if not os.path.exists(filename):
header = [header1, header2, header3]
rows_to_be_written.append(gps)
rows_to_be_written.append(gps2)
rows_to_be_written.append(gps3)
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
if header:
writer.writerow(header)
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)
Also may be you tried write rows, but you write only one row with header in it. Then change code like this
rows_to_be_written = []
if not os.path.exists(filename):
rows_to_be_written.append([header1, header2, header3])
rows_to_be_written.append([gps, gps2, gps3])
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f, delimiter=',')
for row in rows_to_be_written:
writer.writerow(row)
print(gps, gps2, gps3)

You need to add the headings separately, and only if they are not there already:
# check whether the file exist or not
if not os.path.exists(filename):
headings = [header1, header2, header3]
else:
headings = None
rows_to_be_written = [gps, gps2, gps3]
# write the data into csv
with open(filename, 'a', newline='', encoding='UTF8') as f:
writer = csv.writer(f)
# Write headings if exist
if headings != None:
writer.writerow(headings)
# Write rows
writer.writerow(rows_to_be_written)
print(gps, gps2, gps3)
I suggest you consider this approach
# Open file to see if there are headings
with open(filename, "r") as f:
try:
has_headings = csv.Sniffer().has_header(f.read(1024))
except csv.Error:
# The file seems to be empty
has_headings = False
# Open to write. In append mode ("a")
with open(filename, "a") as f:
writer = csv.writer(f)
if has_headings:
# Write the rows at the top
writer.writerow(headings_list)
# Use writerows if youe have a 2D list, else use a for loop of writer.writerow
writer.writerows(lists_of_rows)

Copying Specific Rows between CSV Files Python

I have an image folder with images saved with their ids. I would like to search the ids in the rows of a CSV file, then copy the rows into a new CSV file. Code as follows:
import os
import csv
folder = os.listdir("[image folder]")
file_in = "[csv containing info rows]"
file_out = "[new csv to create]"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
writer = csv.writer(f_output)
reader = csv.reader(f_input)
writer.writerow(["ImageID", "LabelName", "XMin", "XMax", "YMin", "YMax", "IsGroupOf"]) #writes header of new csv
for filename in folder:
idx = os.path.splitext(filename)[0]
for row in reader:
if idx == row[0]:
print(row)
writer.writerow(row)
It outputs only the first matching id into the new CSV, instead of the actual number which is a few thousand. Sry for the simple task but I've been stumped for quite a while.
e.g.
Sample CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
00075905539074f2,/m/04yx4,0.020477816,0.32935154,0.0956023,0.665392,0
00075905539074f2,/m/04yx4,0.3208191,0.63993174,0,0.6596558,0
00075905539074f2,/m/04yx4,0.6757679,0.9914676,0.17208412,0.94837475,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0007d6cf88afaa4a,/m/0bt9lr,0.17342657,0.9020979,0.21678321,0.94172496,0
0008e425fb49a2bf,/m/0bt9lr,0.22610295,0.7150735,0.11170213,0.93439716,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0
3 sample images in folder: 0001eeaf4aed83f9.jpg, 0007cebe1b2ba653.jpg, 0009bad4d8539bb4.jpg
Expected output CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0

You could use the following approach. Firstly, use glob.glob() to only get the jpg files. A set can be used to hold all of the filenames that are found (without their extensions).
Now you can just read the sample CSV file in a row at a time and use a simple in check to test if the ImageID is one of the file names in the set.
For example:
import glob
import csv
import os
files = {os.path.splitext(filename)[0] for filename in glob.glob("*.jpg")}
file_in = "sample.csv"
file_out = "output.csv"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
csv_input = csv.reader(f_input)
header = next(csv_input)
csv_output = csv.writer(f_output)
csv_output.writerow(header)
for row in csv_input:
if row[0] in files: # Is ImageID one of the filenames found?
print(row)
csv_output.writerow(row)
This would give you an output.csv file as follows:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0

Python csv register_dialect delimiter is not working

I have the written the code below to read in a large csv file with many variables and then just print 1 variable for every row in the outfile. It is working except that the delimiter is not being picked up.
import csv
fieldnames = ['tag']
outfile = open('ActiveTags.txt', 'w')
csv.register_dialect('me', delimiter=',', quotechar="'", quoting=csv.QUOTE_ALL, lineterminator='')
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect='me')
with open('ActiveList_16.csv', 'r', newline='') as f:
reader = csv.DictReader(f)
for row in reader:
Tag = row['Tag']
writer.writerow({'tag': Tag})
outfile.close()
What am I missing here? I do not understand why the delimiter is not working on the outfile.

how to delete entire row in csv file and save changes on same file?

i'm new with python and try to modify csv file so i will able to delete specific rows with specific fields according to given list.
in my current code i get the rows which i want to delete but i can't delete it and save the changes on same file (replace).
import os, sys, glob
import time ,csv
# Open a file
path = 'C:\\Users\\tzahi.k\\Desktop\\netzer\\'
dirs = os.listdir( path )
fileslst = []
alertsCode = ("42001", "42003", "42006","51001" , "51002" ,"61001" ,"61002","71001",
"71002","71003","71004","71005","71006","72001","72002","72003","72004",
"82001","82002","82003","82004","82005","82006","82007","83001","84001")
# This would print the unnesscery codes
for file in dirs:
if "ALERTS" in file.upper() :
fileslst.append(file)
fileslst.sort()
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
for row in csvReader:
for alert in alertsCode:
if any(alert in row[2] for s in alertsCode) :
print row
any help?

Read all the rows into a list using a list comprehension and excluding the unwanted rows. Then rewrite the rows to the file in mode w (write mode) which overwrites or replaces the content of the file:
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
clean_rows = [row for row in csvReader if not any(alert in row[2] for alert in alertsCode)]
# csvfile.truncate()
with open(fileslst[-1], 'wb') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerows(clean_rows)

Python CSV: Can I do this with one 'with open' instead of two?

I am a noobie.
I have written a couple of scripts to modify CSV files I work with.
The scripts:
1.) change the headers of a CSV file then save that to a new CSV file,.
2.) Load that CSV File, and change the order of select columns using DictWriter.
from tkinter import *
from tkinter import filedialog
import os
import csv
root = Tk()
fileName = filedialog.askopenfilename(filetypes=(("Nimble CSV files", "*.csv"),("All files", "*.*")))
outputFileName = os.path.splitext(fileName)[0] + "_deleteme.csv" #my temp file
forUpload = os.path.splitext(fileName)[0] + "_forupload.csv"
#Open the file - change the header then save the file
with open(fileName, 'r', newline='') as infile, open(outputFileName, 'w', newline='') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile, delimiter=',', lineterminator='\n')
row1 = next(reader)
#new header names
row1[0] = 'firstname'
row1[1] = 'lastname'
row1[4] = 'phone'
row1[5] = 'email'
row1[11] = 'address'
row1[21] = 'website'
#write the temporary CSV file
writer.writerow(row1)
for row in reader:
writer.writerow(row)
#Open the temporary CSV file - rearrange some columns
with open(outputFileName, 'r', newline='') as dInFile, open(forUpload, 'w', newline='') as dOutFile:
fieldnames = ['email', 'title', 'firstname', 'lastname', 'company', 'phone', 'website', 'address', 'twitter']
dWriter = csv.DictWriter(dOutFile, restval='', extrasaction='ignore', fieldnames=fieldnames, lineterminator='\n')
dWriter.writeheader()
for row in csv.DictReader(dInFile):
dWriter.writerow(row)
My question is: Is there a more efficient way to do this?
It seems like I shouldn't have to make a temporary CSV file ("_deleteme.csv") I then delete.
I assume making the temporary CSV file is a rookie move -- is there a way to do this all with one 'With open' statement?
Thanks for any help, it is greatly appreciated.
--Luke

csvfile can be any object with a write() method. You could craft a custom element, or use StringIO. You'd have to verify efficiency yourself.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Add file name to first column at start of .csv file - python

Related

Data is adjacent to headers in csv

Copying Specific Rows between CSV Files Python

Python csv register_dialect delimiter is not working

how to delete entire row in csv file and save changes on same file?

Python CSV: Can I do this with one 'with open' instead of two?

Categories

Resources