How to show filenames in csv format - python

I am fetching file names from my local file directory and there are 411 items there but after printing it shows me file names and I am not able to count them in python and second problem is by saving in CSV format, it doesn't show me all the file names? Where is my mistake?
import os
FOLDER_PATH = '/home/bilal/Documents/Books/English/'
def listDir(dir):
fileNames = os.listdir(dir)
for fileName in fileNames:
my_file = open('my_file.csv', 'w')
my_file.write(fileName)
# print('File Name: ' + fileName)
if __name__ == '__main__':
listDir(FOLDER_PATH)

import glob
path = r"/home/bilal/Documents/Books/English/*.csv"
my_file = open('my_file.csv', 'w')
fileList = list(map(os.path.basename, glob.glob(path)))
for filename in fileList:
print(filename)
my_file.write(filename)
my_file.close()
or
import glob
path = r"/home/bilal/Documents/Books/English/*.csv"
with open('my_file.csv', 'w') as my_file:
fileList = list(map(os.path.basename, glob.glob(path)))
for filename in fileList:
print(filename)
my_file.write(filename)

Related

How to create new csv files from a directories with python?

I have some csv files that i have filtered with this code and it works:
with open('path' , 'r')as f:
for lines in f:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
Firstly i would like do this but for ALL csv file in my folder.
Secondly i would like to do this in prompt command line if possible( with sys?).
i tried:
import os
from os import walk
from pathlib import Path
dir = r'myPathFolder1'
target = r'myPathFolder2'
filtered_list=[]
for filenames in os.listdir(dir):
for f in filenames:
if f.endswith(".csv"):
newfile = open(dir + f, 'w')
with open(f , 'r') as t:
for lines in t:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
But it doesnt work.
The full code would be, I tried my code, it will copy to another folder.
import os,fnmatch
dir = "C:\\Users\\Frederic\\Desktop\\"
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
filtered_list = find('*.csv', dir)
print(filtered_list)
for filenames in filtered_list:
print(filenames)
for f in filtered_list:
if f.endswith(".csv"):
print(f.endswith(".csv"))
base_dir_pair = os.path.split(f)
address = "C:\\Users\\Frederic\\Desktop\\aa\\"
address = address + base_dir_pair[1]
print(address)
newfile = open(address, 'w')
with open(f, 'r') as t:
print("in1")
for lines in t:
print("in2")
if '2020-12-31' in lines:
print("in3")
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)

How do I iterate over files in a directory including sub directories, using Python?

I try writing a script that counts the lines of code in a project.
The problem I have is it doesn't find all files.
The script looks like this:
import os
root = r"C:\Users\username\data\projects\test"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
break
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
What's wrong with the code?
In your case the issue is the break, if the file doesn't end with .cs or .vb you just skip the directory, you need to change it for continue as follows:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
continue
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
This code can also receive improvements:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = 0
for path, subdirs, files in os.walk(root):
for name in files:
if not filepath.endswith( ('.cs','.vb') ):
continue
filepath = os.path.join(path, name)
with open(filepath) as f:
lines += len(f.read().splitlines())
print(allLines)

Python: work with recursive folders to read and write

I have this code:
# cwd = "C:\Users\johnr\Desktop\myFolder" - current working directory
for filename in os.listdir(os.path.join(cwd, "content")):
header_file = open(header_file_dir, "r")
footer_file = open(footer_file_dir, "r")
if ".md" in filename:
newFilename = filename.replace(".md", ".html")
if ".tile" in filename:
newFilename = filename.replace(".tile", ".html")
elif ".html" in filename:
newFilename = filename
elif ".txt" in filename:
newFilename = filename.replace(".txt", ".html")
else:
print(filename+" is not a valid file type!")
currents_working_file = open(os.path.join(cwd, "build", newFilename), "w")
# Write the header
currents_working_file.write(header_file.read())
# Get the actual stuff we want to put on the page
text_content = open(os.path.join(cwd, "content", filename), "r")
if ".md" in filename:
text_cont1 = "\n"+markdown.markdown(text_content.read())+"\n"
elif ".tile" in filename:
text_cont1 = "\n"+textile.textile(text_content.read())+"\n"
elif ".html" in filename:
text_cont1 = text_content.read()
elif ".txt" in filename:
text_cont1 = text_content.read()
else:
print(filename+" is not a valid file type!")
# Write the text content into the content template and onto the build file
content_templ_dir = os.path.join(cwd, "templates", "content_page.html")
if os.path.exists(content_templ_dir):
content_templ_file = open(content_templ_dir, "r")
content_templ_file1 = content_templ_file.read()
content_templ_file2 = content_templ_file1.replace("{page_content}", text_cont1)
currents_working_file.write(content_templ_file2)
else:
currents_working_file.write(text_cont1)
# Write the footer to the build file
currents_working_file.write("\n"+footer_file.read())
# Close the build file
currents_working_file.close()
which searches for a file in the 'content' directory and then creates a file of the same name in the'build' directory. How can I make this work when there are files in folders in the 'content' directory?
In order to recursively traverse directories, Python provides os.walk:
for root, dirs, files in os.walk(os.path.join(cwd, "content")):
relative_path = os.path.relpath(root, os.path.join(cwd, "content"))
for filename in files:
currents_working_file = open(os.path.join(cwd, "build", relative_path, filename), "w")
Assuming that cwd just holds the path to the current working dir:
from pathlib import Path
from itertools import chain
source_extensions = {'md', 'html', 'txt'}
source_root_dir_path = Path("content")
source_file_paths = chain.from_iterable(
source_root_dir_path.glob("**/*.{}".format(ext)) for ext in source_extensions
)
for p in source_file_paths:
destination_file_path = Path("build", *p.with_suffix(".html").parts[1:])
destination_file_path.parent.mkdir(parents=True, exist_ok=True)
with destination_file_path.open('w') as f:
f.write(header_file.read())
f.write("\n")
f.write(footer_file.read())

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

batch process text to csv using python

I need some help with converting a number of text files to csv files. All my text files are in one folder and I want to convert them to csv files into another folder. The names of individual files should remain the same. Below is the script I got so far...converting an individual file works fine but to work on all the files within a folder is where I am stuck. Any help will be appreciated.
import csv
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder")
txt_files = directory
csv_files = output
try:
for txt_file in txt_files:
in_txt = csv.reader(open(txt_file, "rb"), delimiter = '=')
for csv_file in csv_files:
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
except:
print ()
glob.glob() is perfectly suited for the task. Also, use with context manager when working with files:
import csv
import glob
import os
directory = raw_input("INPUT Folder:")
output = raw_input("OUTPUT Folder:")
txt_files = os.path.join(directory, '*.txt')
for txt_file in glob.glob(txt_files):
with open(txt_file, "rb") as input_file:
in_txt = csv.reader(input_file, delimiter='=')
filename = os.path.splitext(os.path.basename(txt_file))[0] + '.csv'
with open(os.path.join(output, filename), 'wb') as output_file:
out_csv = csv.writer(output_file)
out_csv.writerows(in_txt)

Categories