Copying Specific Rows between CSV Files Python - python

I have an image folder with images saved with their ids. I would like to search the ids in the rows of a CSV file, then copy the rows into a new CSV file. Code as follows:
import os
import csv
folder = os.listdir("[image folder]")
file_in = "[csv containing info rows]"
file_out = "[new csv to create]"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
writer = csv.writer(f_output)
reader = csv.reader(f_input)
writer.writerow(["ImageID", "LabelName", "XMin", "XMax", "YMin", "YMax", "IsGroupOf"]) #writes header of new csv
for filename in folder:
idx = os.path.splitext(filename)[0]
for row in reader:
if idx == row[0]:
print(row)
writer.writerow(row)
It outputs only the first matching id into the new CSV, instead of the actual number which is a few thousand. Sry for the simple task but I've been stumped for quite a while.
e.g.
Sample CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
00075905539074f2,/m/04yx4,0.020477816,0.32935154,0.0956023,0.665392,0
00075905539074f2,/m/04yx4,0.3208191,0.63993174,0,0.6596558,0
00075905539074f2,/m/04yx4,0.6757679,0.9914676,0.17208412,0.94837475,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0007d6cf88afaa4a,/m/0bt9lr,0.17342657,0.9020979,0.21678321,0.94172496,0
0008e425fb49a2bf,/m/0bt9lr,0.22610295,0.7150735,0.11170213,0.93439716,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0
3 sample images in folder: 0001eeaf4aed83f9.jpg, 0007cebe1b2ba653.jpg, 0009bad4d8539bb4.jpg
Expected output CSV:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0

You could use the following approach. Firstly, use glob.glob() to only get the jpg files. A set can be used to hold all of the filenames that are found (without their extensions).
Now you can just read the sample CSV file in a row at a time and use a simple in check to test if the ImageID is one of the file names in the set.
For example:
import glob
import csv
import os
files = {os.path.splitext(filename)[0] for filename in glob.glob("*.jpg")}
file_in = "sample.csv"
file_out = "output.csv"
with open(file_in, 'r', newline='') as f_input, open(file_out, 'w', newline='') as f_output:
csv_input = csv.reader(f_input)
header = next(csv_input)
csv_output = csv.writer(f_output)
csv_output.writerow(header)
for row in csv_input:
if row[0] in files: # Is ImageID one of the filenames found?
print(row)
csv_output.writerow(row)
This would give you an output.csv file as follows:
ImageID,LabelName,XMin,XMax,YMin,YMax,IsGroupOf
0001eeaf4aed83f9,/m/0cmf2,0.022673031,0.9642005,0.07103825,0.80054647,0
0007cebe1b2ba653,/m/07mhn,0.7359882,0.9262537,0.022123894,0.40265486,0
0007cebe1b2ba653,/m/0bt9lr,0.42035398,0.7935103,0.18141593,0.7212389,0
0007cebe1b2ba653,/m/01g317,0.7345133,0.9321534,0,0.36946902,0
0009bad4d8539bb4,/m/0cmf2,0.2945508,0.70544916,0.34070796,0.5154867,0

Related

Read, then Write CSV with "Non-ISO extended-ASCII" text Encoding

My csv has strings like:
TîezÑnmidnan
I'm trying to use the following below to set up a reader/writer
import csv
# File that will be written to
csv_output_file = open(file, 'w', encoding='utf-8')
# File that will be read in
csv_file = open(filename, encoding='utf-8', errors='ignore')
# Define reader
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
# Define writer
csv_writer = csv.writer(csv_output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
Then iterating over the information read in
# Iterate over the rows in the csv
for idx, row in enumerate(csv_reader):
csv_writer.writerow(row[0:30])
Problem is in my output file I can't get it to show up with that same string. According to my mac, the csv file type has the encoding "Non-ISO extended-ASCII"
I tried various encodings and some would just remove the special characters while others just wouldn't work.
It's weird because I can hard code that string above into a variable and use it without problems, so I assume it's something to do with how I'm reading in the file. If I breakpoint before it writes it shows up as the following in the debugger.
T�ez�nmidnan
I can't convert the file before running it, so the python code has to handle any conversions itself.
The expected output I want would be for it to remain in the output file looking like
TîezÑnmidnan
Adding a link to a sample csv that shows the issue along with a complete version of my code (with some details removed)
Example file to run with this
import tkinter as tk
from tkinter.filedialog import askopenfilename
import csv
import os
root = tk.Tk()
root.withdraw()
# Ask for file
filename = os.path.abspath(askopenfilename(initialdir="/", title="Select csv file", filetypes=(("CSV Files", "*.csv"),)))
# Set output file name
output_name = filename.rsplit('.')
del output_name[len(output_name) - 1]
output_name = "".join(output_name)
output_name += "_processed.csv"
# Using the file that will be written to
csv_output_file = open(os.path.abspath(output_name), 'w', encoding='utf-8')
# Using the file is be read in
csv_file = open(filename, encoding='utf-8', errors='ignore')
# Define reader with , delimiter
csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
# Define writer to put quotes around input values with a comma in them
csv_writer = csv.writer(csv_output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header_row = []
# Iterate over the rows in the csv
for idx, row in enumerate(csv_reader):
if idx != 0:
csv_writer.writerow(row)
else:
header_row = row
csv_writer.writerow(header_row)
csv_file.flush()
csv_output_file.flush()
csv_file.close()
csv_output_file.close()
Expected results
Header1,Header2
Value1,TîezÑnmidnan
Actual results
Header1,Header2
Value1,Teznmidnan
Edit:
chardetect gave me "utf-8 with confidence 0.99"

Add file name to first column at start of .csv file

I'm trying to add a new "filename" column at the start of a .csv file then add the name of the file to each line
Example input.csv
Date,Day,Server
2018-5-20,Su,ASA
2018-5-21,Su,ASA
Example Output
Filename,Date,Day,Server
input,2018-5-20,Su,ASA
input,2018-5-21,Su,ASA
It look's easy to add the filename to the end of the .csv using the csv module, but adding a new column at the start is looking more tricky.
Any help is appreciated
So far i have the below from searching silmilar questions online
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
for row in csv_input:
row.insert(0, fname)
csv_output.writerow(row)
Using the above i'm unable to set a header name, instead it adds the whole path to the file name to the first column
C:\Python\input.csv,Date,Day,Server
C:\Python\input.csv,2018-5 -20,Su,ASA
C:\Python\input.csv,2018-5 -21,Su,ASA
The first row contains the header of CSV file. While writing a new csv file the first writerow should be used to write the header.
import csv, glob, os
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
# Header processing
header = csv_input.__next__()
header.insert(0, "filename")
csv_output.writerow(header)
for row in csv_input:
print(row)
row.insert(0, fname)
csv_output.writerow(row)
Note I suggest using csv.DictWriter and csv.DictReader which can be used to specify the field names to write/read and has a writeheader function. This doesn't give any significant advantage but make our code cleaner.
this should work
import os.path
import csv
import glob
with open(r'C:\Python\sys_status\output.csv', 'w', newline='') as file_output:
csv_output = csv.writer(file_output)
for fname in glob.glob(r'C:\Python\sys_status\input.csv'):
with open(fname, newline='') as f_input:
csv_input = csv.reader(f_input)
fname = os.path.split(fname).pop()
for i, row in enumerate(csv_input):
if i == 0:
row.insert(0, 'Filename')
else:
row.insert(0, fname)
csv_output.writerow(row)

Python csv register_dialect delimiter is not working

I have the written the code below to read in a large csv file with many variables and then just print 1 variable for every row in the outfile. It is working except that the delimiter is not being picked up.
import csv
fieldnames = ['tag']
outfile = open('ActiveTags.txt', 'w')
csv.register_dialect('me', delimiter=',', quotechar="'", quoting=csv.QUOTE_ALL, lineterminator='')
writer = csv.DictWriter(outfile, fieldnames=fieldnames, dialect='me')
with open('ActiveList_16.csv', 'r', newline='') as f:
reader = csv.DictReader(f)
for row in reader:
Tag = row['Tag']
writer.writerow({'tag': Tag})
outfile.close()
What am I missing here? I do not understand why the delimiter is not working on the outfile.

how to delete entire row in csv file and save changes on same file?

i'm new with python and try to modify csv file so i will able to delete specific rows with specific fields according to given list.
in my current code i get the rows which i want to delete but i can't delete it and save the changes on same file (replace).
import os, sys, glob
import time ,csv
# Open a file
path = 'C:\\Users\\tzahi.k\\Desktop\\netzer\\'
dirs = os.listdir( path )
fileslst = []
alertsCode = ("42001", "42003", "42006","51001" , "51002" ,"61001" ,"61002","71001",
"71002","71003","71004","71005","71006","72001","72002","72003","72004",
"82001","82002","82003","82004","82005","82006","82007","83001","84001")
# This would print the unnesscery codes
for file in dirs:
if "ALERTS" in file.upper() :
fileslst.append(file)
fileslst.sort()
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
for row in csvReader:
for alert in alertsCode:
if any(alert in row[2] for s in alertsCode) :
print row
any help?
Read all the rows into a list using a list comprehension and excluding the unwanted rows. Then rewrite the rows to the file in mode w (write mode) which overwrites or replaces the content of the file:
with open(fileslst[-1], 'rb') as csvfile:
csvReader = csv.reader(csvfile)
clean_rows = [row for row in csvReader if not any(alert in row[2] for alert in alertsCode)]
# csvfile.truncate()
with open(fileslst[-1], 'wb') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerows(clean_rows)

Python CSV: Can I do this with one 'with open' instead of two?

I am a noobie.
I have written a couple of scripts to modify CSV files I work with.
The scripts:
1.) change the headers of a CSV file then save that to a new CSV file,.
2.) Load that CSV File, and change the order of select columns using DictWriter.
from tkinter import *
from tkinter import filedialog
import os
import csv
root = Tk()
fileName = filedialog.askopenfilename(filetypes=(("Nimble CSV files", "*.csv"),("All files", "*.*")))
outputFileName = os.path.splitext(fileName)[0] + "_deleteme.csv" #my temp file
forUpload = os.path.splitext(fileName)[0] + "_forupload.csv"
#Open the file - change the header then save the file
with open(fileName, 'r', newline='') as infile, open(outputFileName, 'w', newline='') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile, delimiter=',', lineterminator='\n')
row1 = next(reader)
#new header names
row1[0] = 'firstname'
row1[1] = 'lastname'
row1[4] = 'phone'
row1[5] = 'email'
row1[11] = 'address'
row1[21] = 'website'
#write the temporary CSV file
writer.writerow(row1)
for row in reader:
writer.writerow(row)
#Open the temporary CSV file - rearrange some columns
with open(outputFileName, 'r', newline='') as dInFile, open(forUpload, 'w', newline='') as dOutFile:
fieldnames = ['email', 'title', 'firstname', 'lastname', 'company', 'phone', 'website', 'address', 'twitter']
dWriter = csv.DictWriter(dOutFile, restval='', extrasaction='ignore', fieldnames=fieldnames, lineterminator='\n')
dWriter.writeheader()
for row in csv.DictReader(dInFile):
dWriter.writerow(row)
My question is: Is there a more efficient way to do this?
It seems like I shouldn't have to make a temporary CSV file ("_deleteme.csv") I then delete.
I assume making the temporary CSV file is a rookie move -- is there a way to do this all with one 'With open' statement?
Thanks for any help, it is greatly appreciated.
--Luke
csvfile can be any object with a write() method. You could craft a custom element, or use StringIO. You'd have to verify efficiency yourself.

Categories