Use Every CSV file in a Folder - python

I'm trying to have this program use all of the CSV files inside of a folder. The folder is on my desktop, and it's called "Web_Scraper" (I'm on a mac btw).
This is how the code looks; what do I replace whatever is inside of the "enumerate" with (the 1.csv, 2.csv, 3.csv, etc)?
from openpyxl import Workbook
import csv
import os
wb = Workbook()
ws = wb.worksheets[0]
header_keys = []
for n, fName in
enumerate(['3.csv','4.csv','5.csv','6.csv','7.csv','8.csv','9.csv','10.csv','11.csv','12.csv',]):
with open(fName) as fh:
csv_reader = csv.DictReader(fh, fieldnames=['header', 'data'], delimiter=',')
if n == 0:
for values in csv_reader:
I think I'm supposed to use something like os.listdir but I'm not exaclty sure what the syntax should be.

You can take advantage of the glob module
import glob
scraper_files = glob.glob('*.csv') //returns an array of filenames

you can use something like below,
import os
from pathlib import Path
rootdir = 'xx/xx/x'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if ('.csv' in Path.suffix)
csv_reader = csv.DictReader(file, fieldnames=['header', 'data'], delimiter=',')

You can use os module
import os
os.listdir('~/Desktop/Web_Scraper') # Returns list of files or directory names inside that dir
And you can apply filter on file name extension to filter out only csv files

You can use python's inbuilt os module.
import os
os.listdir("this-directory") # returns list of files in this-directory
E.g.
import os
for file in os.listdir("."): # reading from current directory, replace it with your directory.
if file.endswith(".csv"):
with open(file) as fh:
csv_reader = csv.DictReader(fh, fieldnames=['header', 'data'], delimiter=',')
if n == 0:
for values in csv_reader:
Hope it helps!

Related

Is there a way to use a retrieved file name as a variable?

I'm looking to retrieve a list of CSV files, and use these names as variables to open and retrieve their content. Something like this:
import csv
import os
files = os.listdir('C:/csvs')
with open(files[0], 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for line in csv_reader:
if line[1]=="**STAFF**":
pass
else:
print(line)
If I print files[0], I do get the correct content, but when I try the above code it does not work.
os.listdir(directory_path) gives filenames which are inside the folder. To actually use the file you need the full path (absolute or relative). This can be easily done by appending each file's name to the directory_path like this:
import os
files = os.listdir(directory_path)
full_file_path = os.path.join(directory_path, files[0])
You can also use glob to save the trouble of joining the paths.

Creating folders from a single column in a csv and using other column to rename files into folders

I have a csv file that looks like this:
dc_identifier,aubrey_identifier
AR0776-280206-LT513-01,metadc1084267
AR0776-280206-LT513-02,metadc1083385
AR0776-280206-LT513-03,metadc1084185
AR0776-280206-LT513-04,metadc1083449
AR0776-280206-LT513-05,metadc1084294
AR0776-280206-LT513-06,metadc1083393
AR0776-280206-LT513-07,metadc1083604
AR0776-280206-LT513-08,metadc1083956
AR0776-280206-LT513-09,metadc1083223
AR0776-280206-LT513-10,metadc1084224
I need to create folders with the "metadc#######" names within the directory that the script will live in.
Here's what I have so far:
import os
import fileinput
path = 'C:\Users\gpp0020\Desktop\TestDir'
textFile = 'C:\Users\gpp0020\Desktop\TestDir\kxas_ids.csv'
myList = open(textFile, 'rb+')
for line in myList:
for item in line.strip().split(','):
os.makedirs(os.path.join(path, item))
print 'created', item
However! I also need the program to grab files that are named with the identifiers (AR0776-280206-LT513-01, etc) and put them in the corresponding metadc number, according to the csv. Each file is doubled (one .mkv file, and one .mkv.md5 checksum file) and both need to go into the folder.
What's the best way to go about this?
Use the csv library to help with reading the file in:
import csv
import os
import shutil
path = r'C:\Users\gpp0020\Desktop\TestDir'
with open('kxas_ids.csv', 'r', newline='') as f_input:
csv_input = csv.reader(f_input)
header = next(csv_input)
for dv, aubrey in csv_input:
os.makedirs(os.path.join(path, aubrey), exist_ok=True)
mkv = '{}.mkv'.format(dv)
shutil.copy2(os.path.join(path, mkv), os.path.join(path, aubrey, mkv))
mkv_md5 = '{}.mkv.md5'.format(dv)
shutil.copy2(os.path.join(path, mkv_md5), os.path.join(path, aubrey, mkv_md5))
This would for example:
Create a folder called C:\Users\gpp0020\Desktop\TestDir\metadc108426
Copy a file called AR0776-280206-LT513-01.mkv into it.
Copy a file called AR0776-280206-LT513-01.mkv.md5 into it.
It assumes that all files are found in path

How to sequentially read all the files in a directory and export the contents in Python?

I have a directory /directory/some_directory/ and in that directory I have a set of files. Those files are named in the following format: <letter>-<number>_<date>-<time>_<dataidentifier>.log, for example:
ABC1-123_20162005-171738_somestring.log
DE-456_20162005-171738_somestring.log
ABC1-123_20162005-153416_somestring.log
FG-1098_20162005-171738_somestring.log
ABC1-123_20162005-031738_somestring.log
DE-456_20162005-171738_somestring.log
I would like to read those a subset of those files (for example, read only files named as ABC1-123*.log) and export all their contents to a single csv file (for example, output.csv), that is, a CSV file that will have all the data from the inidividual files collectively.
The code that I have written so far:
#!/usr/bin/env python
import os
file_directory=os.getcwd()
m_class="ABC1"
m_id="123"
device=m_class+"-"+m_id
for data_file in sorted(os.listdir(file_dir)):
if str(device)+"*" in os.listdir(file_dir):
print data_file
I don't know how to read a only a subset of filtered files and also how to export them to a common csv file.
How can I achieve this?
just use re lib to match file name pattern, and use csv lib to export.
Only a few adjustments, You were close
filesFromDir = os.listdir(os.getcwd())
fileList = [file for file in filesFromDir if file.startswith(device)]
f = open("LogOutput.csv", "ab")
for file in fileList:
#print "Processing", file
with open(file, "rb") as log_file:
txt = log_file.read()
f.write(txt)
f.write("\n")
f.close()
Your question could be better stated, based on your current code snipet, I'll assume that you want to:
Filter files in a directory based on glob pattern.
Concatenate their contents to a file named output.csv.
In python you can achieve (1.) by using glob to list filenames.
import glob
for filename in glob.glob('foo*bar'):
print filename
That would print all files starting with foo and ending with bar in
the current directory.
For (2.) you just read the file and write its content to your desired
output, using python's open() builtin function:
open('filename', 'r')
(Using 'r' as the mode you are asking python to open the file for
"reading", using 'w' you are asking python to open the file for
"writing".)
The final code would look like the following:
import glob
import sys
device = 'ABC1-123'
with open('output.csv', 'w') as output:
for filename in glob.glob(device+'*'):
with open(filename, 'r') as input:
output.write(input.read())
You can use the os module to list the files.
import os
files = os.listdir(os.getcwd())
m_class = "ABC1"
m_id = "123"
device = m_class + "-" + m_id
file_extension = ".log"
# filter the files by their extension and the starting name
files = [x for x in files if x.startswith(device) and x.endswith(file_extension)]
f = open("output.csv", "a")
for file in files:
with open(file, "r") as data_file:
f.write(data_file.read())
f.write(",\n")
f.close()

Writing filenames from a folder into a csv

I'm trying to parse all files in a folder and write the filenames in a CSV using Python. The code I used is
import os, csv
f=open("C:/Users/Amber/weights.csv",'r+')
w=csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
w.writerow(filename)
The result I'm getting in the CSV has individual alphabets in one column rather than the entire row name. How to fix that?
import os, csv
f=open("C:/Users/Amber/weights.csv",'r+')
w=csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
w.writerow([filename])
writerow() expects a sequence argument:
import os, csv
with open("C:/Users/Amber/weights.csv", 'w') as f:
writer = csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
writer.writerow([filename])
import csv
import glob
with open('csvinput.csv', 'w') as f:
writer = csv.writer(f)
a = glob.glob('filepath/*.png')
writer.writerows(zip(a)) #if you need the results in a column
import os
if __name__ == "__main__":
datapath = open('output.csv", 'w')
folderpath = 'C:\\Users\\kppra\\Desktop\\Data'
for (root,dirs,files) in os.walk(folderpath,topdown=True):
for f in files:
datapath.write(f)
datapath.write('\n')
datapath.close()

Python Script to find file name and size of all files in a directory and save them to a text file for excel import

I am pretty new to python and I am trying to:
Within a user defined directory (1000 images)
Find the file name (Finished)
Find the file size (Finished)
Store to an array
Store array to text file
from Tkinter import Tk
from tkFileDialog import askdirectory
from array import *
import os
Tk().withdraw()
fileDir = askdirectory()
print(fileDir)
dirList=os.listdir(fileDir)
for fname in dirList:
print fname
print os.path.getsize(fileDir+"/"+fname)
For storing the file names and sizes in tuples and writing them to a text file you can do something like this:
from Tkinter import Tk
from tkFileDialog import askdirectory
from array import *
import os
Tk().withdraw()
fileDir = askdirectory()
print(fileDir)
dirList = os.listdir(fileDir)
data = ((fname, str(os.path.getsize(fileDir + "/" + fname))) for fname in dirList)
outputFile = open('myoutputfile.csv', 'w')
for entry in data:
outputFile.write(','.join(entry) + '\n')
outputFile.close()
Running this on my python install directory gives a file that looks like this:
DLLs,4096
Doc,0
include,32768
Lib,65536
libs,4096
LICENSE.txt,40080
NEWS.txt,285115
python.exe,27136
pythonw.exe,27648
README.txt,54967
Scripts,0
tcl,4096
Tools,4096
Which you should be able to open as a .csv file in Excel no problems. Note you may have to quote the file names if any of them have commas, new lines, etc. in them.
For more heavy duty stuff there is the csv module but I think it is overkill for such a simple file. In case you are interested using it would look something like this:
import csv
with open('myoutputfile.csv', 'wb') as outputFile:
csvwriter = csv.writer(outputFile, dialect='excel')
for entry in data:
csvwriter.writerow(entry)
You probably want to do something like this:
#!/usr/bin/env python
import csv, os
OUT = 'files_and_sizes.csv'
DIR = '<some directory>'
def main():
writer = csv.writer(open(OUT, 'w'))
for f in os.listdir(DIR):
if os.path.isfile(DIR+ '/' + f):
writer.writerow([f, os.path.getsize(DIR + '/' + f)])
if __name__ == '__main__':
main()
CSV and Excel play nice with each other, so you shouldn't have any trouble importing it.
some notes about the below implementation:
generally always use the csv library because it properly handles special csv characters like commas and quotes. It can be really messy if you will do this escaping by hand.
it's better not to hard code path delimiter like /, the above two answers won't work on windows machines. By using os.path.join your code will run correctly on all platforms.
code:
from tkFileDialog import askdirectory
from glob import glob
from os import listdir
from os.path import join, getsize
import csv
CSV_FILE = 'listing.csv'
path = askdirectory()
files = listdir(path)
rows = [(i, getsize(join(path, i))) for i in files]
with open(CSV_FILE, 'wb') as f:
writer = csv.writer(f)
writer.writerows(rows)
#!/usr/bin/env python
import csv
import os
from Tkinter import Tk
from os.path import join, getsize
from tkFileDialog import askdirectory
# get directory path
root = Tk()
root.withdraw()
dirpath = askdirectory()
root.destroy()
# save filename, file size entries for the directory in csv format
entries = ((fn, getsize(join(dirpath, fn))) for fn in os.listdir(dirpath))
with open('entries.csv', 'wb') as file:
csv.writer(file).writerows(entries)

Categories