Writing filenames from a folder into a csv - python

I'm trying to parse all files in a folder and write the filenames in a CSV using Python. The code I used is
import os, csv
f=open("C:/Users/Amber/weights.csv",'r+')
w=csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
w.writerow(filename)
The result I'm getting in the CSV has individual alphabets in one column rather than the entire row name. How to fix that?

import os, csv
f=open("C:/Users/Amber/weights.csv",'r+')
w=csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
w.writerow([filename])

writerow() expects a sequence argument:
import os, csv
with open("C:/Users/Amber/weights.csv", 'w') as f:
writer = csv.writer(f)
for path, dirs, files in os.walk("C:/Users/Amber/Creator"):
for filename in files:
writer.writerow([filename])

import csv
import glob
with open('csvinput.csv', 'w') as f:
writer = csv.writer(f)
a = glob.glob('filepath/*.png')
writer.writerows(zip(a)) #if you need the results in a column

import os
if __name__ == "__main__":
datapath = open('output.csv", 'w')
folderpath = 'C:\\Users\\kppra\\Desktop\\Data'
for (root,dirs,files) in os.walk(folderpath,topdown=True):
for f in files:
datapath.write(f)
datapath.write('\n')
datapath.close()

Related

How to open multiple text files from an array?

I want to open and read several text files. The plan is to find a string in the text files and print the whole line from the string. The thing is, I canĀ“t open the paths from the array.
I hope it is unterstandable what I want to try.
import os
from os import listdir
from os.path import join
from config import cred
path = (r"E:\Utorrent\Leaked_txt")
for filename in os.listdir(path):
list = [os.path.join(path, filename)]
print(list)
for i in range(len(list)-1):
with open(str(list[i], "r")) as f:
for line in f:
if cred in line:
print(line)
Thanks :D
I prefer to use glob when reading several files in a directory
import glob
files = glob.glob(r"E:\Utorrent\Leaked_txt\*.txt") # read all txt files in folder
for file in files: # iterate over files
with open(file, 'r') as f: # read file
for line in f.read(): # iterate over lines in each file
if cred in line: # if some string is in line
print(line) # print the line
With os, you can do something like this:
import os
from config import cred
path = "E:/Utorrent/Leaked_txt"
files = [os.path.join(path, file) for file in os.listdir(path) if file.endswith(".txt")]
for file in files:
with open(file, "r") as f:
for line in f.readlines():
if cred in line:
print(line)
Edit
os.listdir only includes files from the parent directory (specified by path). To get the .txt files from all sub-directories, use the following:
files = list()
for root, _, f in os.walk(path):
files += [os.path.join(root, file) for file in f if file.endswith(".txt")]

read mutiple json files from multiple directories

i have multiple directories which all contain JSON files.
I know how I could read all in ONE directory, but not how to read them in all directories without specifying the dirctory names.
I played around and came up with something like this:
import json
import os
path_to_json = 'path/to/dir/with/dirs'
json_files = [pos_json for pos_json in os.listdir(path_to_json)]
for json_file in json_files:
filename = str(json_file + "/") # here something like "*.json"
with open(filename, 'r') as myfile:
data=myfile.read()
any help is greatly appreciated
Use os.walk with str.endswith
Ex:
path_to_json = 'path/to/dir/with/dirs'
json_files = []
for root, dirs, files in os.walk(path_to_json):
for f in files:
if f.endswith('.json'): #Check for .json exten
json_files.append(os.path.join(root, f)) #append full path to file
for json_file in json_files:
with open(json_file, 'r') as myfile:
data=myfile.read()
You can use the os.walk and give the top level directory as the directory_name.
import os
root = "<path-to-dir>"
for path, subdirs, files in os.walk(root):
for filename in files:
if filename.endswith('.json'):
with open(filename, 'r') as myfile:
data = myfile.read()

I have different zip files that contain one csv file each. how do I unzip each folder, and save all the csv files in one folder

The codes I have written, for some reasons does not work.
import pandas as pd
import glob
import zipfile
path = r"C:/Users/nano/Documents/Project" # use your path
all_files = glob.glob(path + "/*.gz")
for folder in all_files:
with zipfile.ZipFile(folder,"r") as zip_ref:
zip_ref.extractall(path)
First you are using Zip against Gzip. So you need to use the right library. Below is a working example of the code.
import glob
import os
import gzip
path = r"C:/Temp/Unzip" # use your path
all_files = glob.glob(path + "/*.gz")
print(all_files)
for file in all_files:
path, filename = os.path.split(file)
filename = os.path.splitext(filename)[0]
with gzip.open(file,"rb") as gz:
with open('{0}/{1}.csv'.format(path, filename), 'wb') as cv:
cv.writelines(gz.read())
gzip (.gz) and zip (.zip) are two different things. For gzip, you can use gzip:
import glob
import gzip
import shutil
path = r"C:/Users/shedez/Documents/Project" # use your path
all_files = glob.glob(path + "/*.gz")
for folder in all_files:
dst=folder[:-3] # destination file name
with gzip.open(folder, 'rb') as f_in, open(dst, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
If you use gz (gZip) format, you might want to look at the gzip package, I'm not aware of an extract method, but you can do something as such, using pandas purely, which i find more convenient:
for folder in all_files:
c = pd.read_csv(folder, compression='gzip')
c.to_csv(path+folder[:-2]+"csv")
the [:-2] is to cut the "gz", and you might want to either change the parameters of read_csv (adding header row, or whatever) or the flags of to_csv (setting the arguments header=False, index_label=False to prevent panda adding you undesired stuff
alternatively, you could open it with gzip
import gzip
import shutil
with open(folder, 'rb') as f_in, gzip.open(folder[:-2]+"csv", 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
Try out this code:
import os, zipfile
dir_name = 'C:\\Users\\shedez\\Documents\\Project' # ZIP location
extract_dir_name = 'C:\\Users\\shedez\\Documents\\Project\\Unziped' # CSV location after unzip
extension = ".zip" # you might have to change this
os.chdir(dir_name) # change directory from working dir to dir with files
for item in os.listdir(dir_name): # loop through items in dir
if item.endswith(extension): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(extract_dir_name) # extract file to dir
zip_ref.close() # close file
If you want to learn more about zipFile, click here.

Use Every CSV file in a Folder

I'm trying to have this program use all of the CSV files inside of a folder. The folder is on my desktop, and it's called "Web_Scraper" (I'm on a mac btw).
This is how the code looks; what do I replace whatever is inside of the "enumerate" with (the 1.csv, 2.csv, 3.csv, etc)?
from openpyxl import Workbook
import csv
import os
wb = Workbook()
ws = wb.worksheets[0]
header_keys = []
for n, fName in
enumerate(['3.csv','4.csv','5.csv','6.csv','7.csv','8.csv','9.csv','10.csv','11.csv','12.csv',]):
with open(fName) as fh:
csv_reader = csv.DictReader(fh, fieldnames=['header', 'data'], delimiter=',')
if n == 0:
for values in csv_reader:
I think I'm supposed to use something like os.listdir but I'm not exaclty sure what the syntax should be.
You can take advantage of the glob module
import glob
scraper_files = glob.glob('*.csv') //returns an array of filenames
you can use something like below,
import os
from pathlib import Path
rootdir = 'xx/xx/x'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if ('.csv' in Path.suffix)
csv_reader = csv.DictReader(file, fieldnames=['header', 'data'], delimiter=',')
You can use os module
import os
os.listdir('~/Desktop/Web_Scraper') # Returns list of files or directory names inside that dir
And you can apply filter on file name extension to filter out only csv files
You can use python's inbuilt os module.
import os
os.listdir("this-directory") # returns list of files in this-directory
E.g.
import os
for file in os.listdir("."): # reading from current directory, replace it with your directory.
if file.endswith(".csv"):
with open(file) as fh:
csv_reader = csv.DictReader(fh, fieldnames=['header', 'data'], delimiter=',')
if n == 0:
for values in csv_reader:
Hope it helps!

How to Save file names and their directories path in a text file using Python

I am trying to find a string that is contained in files under a directory. Then make it to store it's file names and directories under a new text file or something.
I got upto where it is going through a directory and finding a string, then printing a result. But not sure of the next step.
Please help, I'm completely new to coding and python.
import glob, os
#Open a source as a file and assign it as source
source = open('target.txt').read()
filedirectories = []
#locating the source file and printing the directories.
os.chdir("/Users/a1003584/desktop")
for root, dirs, files in os.walk(".", topdown=True):
for name in files:
print(os.path.join(root, name))
if source in open(os.path.join(root, name)).read():
print 'treasure found.'
Don't do a string comparison if your looking for a dictionary. Instead use the json module. Like this.
import json
import os
filesFound = []
def searchDir(dirName):
for name in os.listdir(dirName):
# If it is a file.
if os.isfile(dirName+name):
try:
fileCon = json.load(dirName+name)
except:
print("None json file.")
if "KeySearchedFor" in fileCon:
filesFound.append(dirName+name)
# If it is a directory.
else:
searchDir(dirName+name+'/')
# Change this to the directory your looking in.
searchDir("~/Desktop")
open("~/Desktop/OutFile.txt",'w').write(filesFound)
This should write the output to a csv file
import csv
import os
with open('target.txt') as infile: source = infile.read()
with open("output.csv", 'w') as fout:
outfile = csv.writer(fout)
outfile.writerow("Directory FileName FilePath".split())
for root, dirnames, fnames in os.walk("/Users/a1003584/desktop", topdown=True):
for fname in fnames:
with open(os.path.join(root, fname)) as infile:
if source not in infile.read(): continue
outfile.writerow(root, fname, os.path.join(root, fname))

Categories