Move files listed in csv file? - python

I have been trying to use the following code to move files that are listed in a csv list. But at most it will copy the last file in the list but not the rest.
I keep hitting this wall with every example I have seen listed what am I doing wrong?
My CVS list will have a list like:
12355,12355.jpg
Here's my code
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print (rowDict)
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
if file in rowDict[1]:
shutil.move(src_file, dst_file)

I think doing something like this will work (untested):
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print(rowDict) # if desired
valid_files = set(keys.values()) # file names found in csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
if file in valid_files:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
As an optimization, unless you need the keys dictionary for other processing, you could change the first part so it just creates the valid_files set variable used in the second for loop:
valid_files = set() # empty set
with open('shuttle_image.csv', 'r') as f:
for rowDict in csv.reader(f, delimiter=','):
valid_files |= {rowDict[1]} # add file name to set
print(rowDict) # if desired

The reason why it's only the last file that could be copied (if it was) is because in this line:
if file in rowDict[1]:
you are referencing rowDict outside of the first for-loop. So at that execution moment, it contains the last value of this loop.
If I understand correctly what you are trying to do you could try something like this (untested code):
import os
import shutil
import csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
id, filename = rowDict
src_file = os.path.join(dir_src, filename)
if os.path.exists(src_file):
shutil.move(src_file, dir_dst)
So instead of:
Constructing a dictionary with all the values in your CSV file
Somehow check for every file in your source directory that it is included in your dictionary (which is what I interpreted you were trying to do)
And move it if it does.
You could:
For every file extracted from your CSV, check that it exists in your source directory.
If it does, you move it to the destination directory.
Is that what you were trying to do ?
[And if the filename stays the same, you only need to specify the destination directory for the second argument of shutil.move()]

Related

How to use elements in list by order

My goal is to change multiple csv files in a folder into JSON.
First, I needed to list my csv files
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
#check if csv files are listed correctly
print(os.path.join("C:/Users/folder_to_csv", filename))
With this, I was able to call csv files in that folder.
Result:
C:/Users/folder_to_csv\file_1.csv C:/Users/folder_to_csv\file_2.csv C:/Users/folder_to_csv\file_3.csv
Then, I wanted to use all of the csv files in 'csvlist' to jsonObj, however for some reason, my codes are only using the first file (C:/Users/folder_to_csv\file_1.csv)
This is what I have tried so far:
import json
import csv
import requests
import threading
import os
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
csvlist = os.path.join("C:/Users/folder_to_csv", filename)
data = {}
def main():
#loop csv list so my codes can read all csv files
length = len(csvlist)
for i in range(length):
i += 1
path = csvlist
#switch csv to json
with open(path, mode='r') as f:
reader = csv.DictReader(f)
processdata = [row for row in reader]
dlist = processdata
jsonObj = json.dumps(dlist)
})
print(jsonObj)
main()
In the initial loop, you keep redefining the csvlist variable. I suppose you want it to be a list? Then just create an initial empty list and append to it instead of redefining
csvlist = []
...
csvlist.append(os.path.join("C:/Users/folder_to_csv", filename))

python csv add column name

Can you help me please. how to add column name to csv file with python.
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
file_exists = os.path.isfile(csvfilename)
f = open(csvfilename,'a')
f.write(list[0] + ';' + '\r\n')
f.close()
may be you can add a header like this?
with open(csvfilename, 'wt', newline ='') as file:
write_header = csv.writer(file, delimiter=',')
write_header.writerow(i for i in list[0])
Since you just want to modify a single line of the file there isn't a need to run this all through a CSV processor. Its generally best not to read and write to the same file. You can create a temporary file and make the changes to the top of the file before copying the bulk.
import shutil
dirname = os.path.dirname(os.path.abspath(__file__))
csvfilename = os.path.join(dirname, 'csvfile.csv')
tmpfilename = os.path.join(dirname, 'csvfile.csv.tmp')
file_exists = os.path.isfile(csvfilename)
with open(csvfilename, 'rb') as f_in, open(tmpfilename, 'wb') as f_out:
# discard current header
next(f_in)
# write new header
f_out.write("colname\r\n".encode())
# copy the rest of the file
shutil.copyfileobj(f_in, f_out)
# replace original csv with fixed
shutil.move(tmpfilename, csvfilename)

How to add for loop in python?

I'm creating new files from originally existing ones in the mdp folder by changing a couple of lines in those files using python. I need to do this for 1000 files. Can anyone suggest a for loop which reads all files and changes them and creates new in one go?
This way I have to change the the number followed by 'md_' in the path and it's tedious because there are a 1000 files here.
I tried using str() but there was a 'could not read file error'
fin = open("/home/abc/xyz/mdp/md_1.mdp", "rt")
fout = open("/home/abc/xyz/middle/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('integrator = md', 'integrator
= md-vv'))
fin = open("/home/abc/xyz/middle/md_1.mdp", "rt")
fout = open("/home/abc/xyz/mdb/md_1.mdp", "wt")
for line in fin:
fout.write(line.replace('dt = 0.001', 'dt
= -0.001'))
fin.close()
fout.close()
os.listdir(path) is your friend:
import os
sourcedir = "/home/abc/xyz/mdp"
destdir = "/home/abc/xyz/middle"
for filename in os.listdir(sourcedir):
if not filename.endswith(".mdp"):
continue
source = os.path.join(sourcedir, filename)
dest = os.path.join(destdir, filename)
# with open(xxx) as varname makes sure the file(s)
# will be closed whatever happens in the 'with' block
# NB text mode is the default, and so is read mode
with open(source) as fin, open(dest, "w") as fout:
# python files are iterable... avoids reading
# the whole file in memory at once
for line in fin:
# will only work for those exact strings,
# you may want to use regexps if number of
# whitespaces vary etc
line = line.replace("dt = 0.001", "dt = -0.001")
line = line.replace(
'integrator = md',
'integrator = md-vv'
)
fout.write(line)
Assuming you want to edit all files that are located in the mdp folder you could do something like this.
import os
dir = "/home/abc/xyz/mdp/"
for filename in os.listdir(dir):
with open(dir + filename, "r+") as file:
text = file.read()
text = text.replace("dt = 0.001", "dt = -0.001")
file.seek(0)
file.write(text)
file.truncate()
This will go through every file and change it using str.replace().
If there are other files in the mdp folder that you do not want to edit, you could use and if-statement to check for the correct file name. Add something like this to encase the with open statement.
if filename.startswith("md_")

Select an item in csv based on value of second column

I'm trying to copy the files to another folder based on my csv. My csv contains the list of files in my folder. But how can I filter it based on the second column? For example copy only this file if the second column contains "undetected".
Here is my code but I dont know how to filter the files. This copies all my files to another folder.
import os
import shutil
import csv
valid_files = set() # empty set
with open('sha1_vsdt.csv', 'r') as f:
for rowDict in csv.reader(f, delimiter=','):
valid_files |= {rowDict[0] and "Undetected" in rowDict [2] } # add file name to set
print(rowDict) # if desired
dir_src = 'C:\Users\Administrator\Desktop\OJT\scanner\\samples_extracted'
dir_dst = 'C:\Users\Administrator\Desktop\OJT\scanner\\transfer'
for file in os.listdir(dir_src):
if file in valid_files:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.copy(src_file, dst_file)
How do I correct this line?
valid_files |= {rowDict[0] and "Undetected" in rowDict [2] } # add file name to set
Example entries of my csv
0191a23ee122bdb0c69008971e365ec530bf03f5,aaa,MIME 6010-0
02b809d4edee752d9286677ea30e8a76114aa324,bbb,Microsoft RTF 6008-0
0349e0101d8458b6d05860fbee2b4a6d7fa2038d,ccc,Adobe Portable Document Format(PDF) 6015-0
035a7afca8b72cf1c05f6062814836ee31091559,ddd,Adobe Portable Document Format(PDF) 6015-0
042065bec5a655f3daec1442addf5acb8f1aa824,eee,Undetected
04939e040d9e85f84d2e2eb28343d94a50ed46ac,fff,Undetected
An if should be all that is required:
for rowDict in csv.reader(f, delimiter=','):
if "Undetected" in rowDict[2]:
valid_files.add(rowDict[0])

grab headers from multiple tsv/csv files

I have a list of tsv files where I am looking to grab column headers for all the files.
with open(os.path.abspath('reference/file.tsv'), 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
Currently, this snippet only reads 1 file where I have a list of files that needs to be parsed.
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
The name of the files are listed in files. How do I loop through the list of files and grab only the column headers for each file?
I try this and it works.
import os
import csv
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for f in files:
with open(dir_path +'/'+f, 'rU') as file:
reader = csv.reader(file)
row1 = next(reader)
print row1
The files variable in your code is the content of the reference folder, meaning all files and subfolders of the folder. They are returned in a list of strings, containing only the file or subfolder name. This means that you'll have to prefix the path yourself.
Example:
dir_path = os.path.abspath('reference/')
files = os.listdir(dir_path)
for file in files:
# Skip non-files
if not os.path.isfile(file):
continue
with open(os.path.join(dir_path, file), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
An alternative using the pathlib module:
for file in Path('reference/').glob('*'):
if not file.is_file():
continue
with open(str(file.resolve()), 'rU') as f:
reader = csv.reader(f)
row1 = next(reader)
Wouldn't you be better off in reading the first line of each of those files, appending them to a list and then passing them to csvreader?
Example:
lines = []
with open(str(file.resolve()), 'rU') as f:
lines.append(f.readline())
reader = csv.reader(lines)
for row in reader:
# whatever you want to do with the parsed lines

Categories