How to create new csv files from a directories with python? - python

I have some csv files that i have filtered with this code and it works:
with open('path' , 'r')as f:
for lines in f:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
Firstly i would like do this but for ALL csv file in my folder.
Secondly i would like to do this in prompt command line if possible( with sys?).
i tried:
import os
from os import walk
from pathlib import Path
dir = r'myPathFolder1'
target = r'myPathFolder2'
filtered_list=[]
for filenames in os.listdir(dir):
for f in filenames:
if f.endswith(".csv"):
newfile = open(dir + f, 'w')
with open(f , 'r') as t:
for lines in t:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
But it doesnt work.

The full code would be, I tried my code, it will copy to another folder.
import os,fnmatch
dir = "C:\\Users\\Frederic\\Desktop\\"
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
filtered_list = find('*.csv', dir)
print(filtered_list)
for filenames in filtered_list:
print(filenames)
for f in filtered_list:
if f.endswith(".csv"):
print(f.endswith(".csv"))
base_dir_pair = os.path.split(f)
address = "C:\\Users\\Frederic\\Desktop\\aa\\"
address = address + base_dir_pair[1]
print(address)
newfile = open(address, 'w')
with open(f, 'r') as t:
print("in1")
for lines in t:
print("in2")
if '2020-12-31' in lines:
print("in3")
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)

Related

How to show filenames in csv format

I am fetching file names from my local file directory and there are 411 items there but after printing it shows me file names and I am not able to count them in python and second problem is by saving in CSV format, it doesn't show me all the file names? Where is my mistake?
import os
FOLDER_PATH = '/home/bilal/Documents/Books/English/'
def listDir(dir):
fileNames = os.listdir(dir)
for fileName in fileNames:
my_file = open('my_file.csv', 'w')
my_file.write(fileName)
# print('File Name: ' + fileName)
if __name__ == '__main__':
listDir(FOLDER_PATH)
import glob
path = r"/home/bilal/Documents/Books/English/*.csv"
my_file = open('my_file.csv', 'w')
fileList = list(map(os.path.basename, glob.glob(path)))
for filename in fileList:
print(filename)
my_file.write(filename)
my_file.close()
or
import glob
path = r"/home/bilal/Documents/Books/English/*.csv"
with open('my_file.csv', 'w') as my_file:
fileList = list(map(os.path.basename, glob.glob(path)))
for filename in fileList:
print(filename)
my_file.write(filename)

How do I iterate over files in a directory including sub directories, using Python?

I try writing a script that counts the lines of code in a project.
The problem I have is it doesn't find all files.
The script looks like this:
import os
root = r"C:\Users\username\data\projects\test"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
break
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
What's wrong with the code?
In your case the issue is the break, if the file doesn't end with .cs or .vb you just skip the directory, you need to change it for continue as follows:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
continue
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
This code can also receive improvements:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = 0
for path, subdirs, files in os.walk(root):
for name in files:
if not filepath.endswith( ('.cs','.vb') ):
continue
filepath = os.path.join(path, name)
with open(filepath) as f:
lines += len(f.read().splitlines())
print(allLines)

Copy files from text file in directory

I am attempting to move pdf files from sub-directories in a folder. This code works and moves all pdfs found. I would like to only move pdf files that match number from a text file using this code:
with open('LIST.txt', 'r') as f:
myNames = [line.strip() for line in f]
print myNames
Full code:
import os
import shutil
with open('LIST.txt', 'r') as f:
myNames = [line.strip() for line in f]
print myNames
dir_src = r"C:\Users\user\Desktop\oldfolder"
dir_dst = r"C:\Users\user\Desktop\newfolder"
for dirpath, dirs, files in os.walk(dir_src):
for file in files:
if file.endswith(".pdf"):
shutil.copy( os.path.join(dirpath, file), dir_dst )
example of text file content:
111111
111112
111113
111114
First, create a set instead of a list here so lookup will be faster:
myNames = {line.strip() for line in f}
Then for the filter, I assume that myNames must match the basename (minus extension) of your file(s). So instead of:
if file.endswith(".pdf"):
shutil.copy( os.path.join(dirpath, file), dir_dst )
check the extension and if the basename minus extension belongs to your previously created set:
bn,ext = os.path.splitext(file)
if ext == ".pdf" and bn in myNames:
shutil.copy( os.path.join(dirpath, file), dir_dst )
To match the filename with a substring within myNames, you cannot rely on the in method. You can do:
if ext == ".pdf" and any(s in file for s in myNames):

Python script to loop through all files text in directory... delete any if that's file has lines start by 1

I'm trying to write a Python script that loops through all text files in a directory and deletes any file that has lines that start with 1.
I tried to do it with python, but I don't know how.
My code:
import os
import re
import string
filedir = "C:\Users\Mr The Vinh\Downloads\ver 3.0.2.4\ver 3.0.2.4\baiviet_da_thay_link\test"
t = 0
for root, dirs, filenames in os.walk(filedir):
for f in filenames:
path = os.path.join(root, f)
text = open(path, 'r')
delkey = re.search(r'^1',text.read())
if delkey:
t = t + 1
os.remove(path)
print 'We deleted file:', t
when i run i saw
Process finished with exit code 0 with no erro code
I believe you should iterate over lines with for line in text.readlines(): to check starting of line.
import os
import re
import string
filedir = "C:\Users\Mr The Vinh\Downloads\ver 3.0.2.4\ver 3.0.2.4\baiviet_da_thay_link\test"
t = 0
for root, dirs, filenames in os.walk(filedir):
for f in filenames:
path = os.path.join(root, f)
text = open(path, 'r')
for line in text.readlines():
delkey = re.search(r'^1',line)
if delkey:
t = t + 1
os.remove(path)
break
print 'We deleted file:', t

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?
Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')
This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

Categories