Copy files from text file in directory

Copy files from text file in directory - python

I am attempting to move pdf files from sub-directories in a folder. This code works and moves all pdfs found. I would like to only move pdf files that match number from a text file using this code:
with open('LIST.txt', 'r') as f:
myNames = [line.strip() for line in f]
print myNames
Full code:
import os
import shutil
with open('LIST.txt', 'r') as f:
myNames = [line.strip() for line in f]
print myNames
dir_src = r"C:\Users\user\Desktop\oldfolder"
dir_dst = r"C:\Users\user\Desktop\newfolder"
for dirpath, dirs, files in os.walk(dir_src):
for file in files:
if file.endswith(".pdf"):
shutil.copy( os.path.join(dirpath, file), dir_dst )
example of text file content:
111111
111112
111113
111114

First, create a set instead of a list here so lookup will be faster:
myNames = {line.strip() for line in f}
Then for the filter, I assume that myNames must match the basename (minus extension) of your file(s). So instead of:
if file.endswith(".pdf"):
shutil.copy( os.path.join(dirpath, file), dir_dst )
check the extension and if the basename minus extension belongs to your previously created set:
bn,ext = os.path.splitext(file)
if ext == ".pdf" and bn in myNames:
shutil.copy( os.path.join(dirpath, file), dir_dst )
To match the filename with a substring within myNames, you cannot rely on the in method. You can do:
if ext == ".pdf" and any(s in file for s in myNames):

Related

How to create new csv files from a directories with python?

I have some csv files that i have filtered with this code and it works:
with open('path' , 'r')as f:
for lines in f:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
Firstly i would like do this but for ALL csv file in my folder.
Secondly i would like to do this in prompt command line if possible( with sys?).
i tried:
import os
from os import walk
from pathlib import Path
dir = r'myPathFolder1'
target = r'myPathFolder2'
filtered_list=[]
for filenames in os.listdir(dir):
for f in filenames:
if f.endswith(".csv"):
newfile = open(dir + f, 'w')
with open(f , 'r') as t:
for lines in t:
if '2020-12-31' in lines:
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)
But it doesnt work.

The full code would be, I tried my code, it will copy to another folder.
import os,fnmatch
dir = "C:\\Users\\Frederic\\Desktop\\"
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
filtered_list = find('*.csv', dir)
print(filtered_list)
for filenames in filtered_list:
print(filenames)
for f in filtered_list:
if f.endswith(".csv"):
print(f.endswith(".csv"))
base_dir_pair = os.path.split(f)
address = "C:\\Users\\Frederic\\Desktop\\aa\\"
address = address + base_dir_pair[1]
print(address)
newfile = open(address, 'w')
with open(f, 'r') as t:
print("in1")
for lines in t:
print("in2")
if '2020-12-31' in lines:
print("in3")
line_data = lines.split(';')
filtered_list.append(line_data)
newfile.write(lines)

How do I iterate over files in a directory including sub directories, using Python?

I try writing a script that counts the lines of code in a project.
The problem I have is it doesn't find all files.
The script looks like this:
import os
root = r"C:\Users\username\data\projects\test"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
break
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
What's wrong with the code?

In your case the issue is the break, if the file doesn't end with .cs or .vb you just skip the directory, you need to change it for continue as follows:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = []
for path, subdirs, files in os.walk(root):
for name in files:
filepath = os.path.join(path, name)
if not filepath.endswith( ('.cs','.vb') ):
continue
with open(filepath) as f:
lines = f.read().splitlines()
for line in lines:
allLines.append(line)
print(len(allLines))
This code can also receive improvements:
import os
root = r"C:\Users\frank\Daten\Projekte\CS\mpv.net"
allLines = 0
for path, subdirs, files in os.walk(root):
for name in files:
if not filepath.endswith( ('.cs','.vb') ):
continue
filepath = os.path.join(path, name)
with open(filepath) as f:
lines += len(f.read().splitlines())
print(allLines)

insert a word in lines and save the files using Python

I Have files in a directory that have many lines like this
cd /oasis/projects/nsf/ets100/
oconv /oasis/projects/nsf/ets100/rla
I would like to insert the word "sky/" so the second line reads like this
oconv /oasis/projects/nsf/sky/ets100/rla
What is the best way to do this?
I know that the beginning of the code will be something like this:
path = r"c:\test"
for root, subFolders, files in os.walk(path):
for file in files:
with open(os.path.join(root, file), 'r') as fRead:
if line.startswith("oconv"):

You can do something like this
def replace_line(file):
with open(os.path.abspath(file), 'r') as file_obj:
data = file_obj.readlines()
for index, lines in enumerate(data):
if lines.startswith('oconv'):
data[index] = lines.replace('/nsf/', '/nsf/sky/')
with open(os.path.abspath(file), 'w') as file_obj:
file_obj.writelines(data)
path = r"c:\test"
for root, subFolders, files in os.walk(path):
for file in files:
replace_line(file)

You can use regex to update the string:
import os
import re
path = r"c:\test"
for root, subFolders, files in os.walk('test'):
for file in files:
with open(os.path.join(root, file), 'r+') as fh: #open the file with 'r+' to read and write
content = fh.read()
new_content = re.sub(r'oconv \/oasis\/projects\/nsf\/(.*)', r'oconv /oasis/projects/nsf/sky/\1', content) #replace strings in file
fh.seek(0)
fh.truncate()
fh.write(new_content)

double backslash (\\) in file name while opening

path = r'C:\Myfolder\data\today'
for root, directories, filenames in os.walk(path):
for filename in filenames:
fname = os.path.join(root,filename)
if os.path.isfile(fname) and fname[-4:] == '.log':
if fname not in rows1:
print fname
fname=fname.replace(path,"")
with open(fname, 'r') as myfile:
My file name looks like C:\Myfolder\data\today\00.log and I just need "today\00.log"
The error is IOError: [Errno 2] No such file or directory: '\today\00.log'
How to remove \ from the file name?

Besides, there isn't really an error, you should not use replace to strip off a known prefix, use string-slicing instead and use the full filename to open the file:
path = r'C:\Myfolder\data\today'
for root, directories, filenames in os.walk(path):
for filename in filenames:
fullname = os.path.join(root,filename)
if os.path.isfile(fullname) and fullname[-4:] == '.log':
if fullname not in rows1:
print fullname
fname = fullname[len(path)+1:]
with open(fullname, 'r') as myfile:
do_something

How to output in different directory?

I have this:
from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")
It produced tokenized files in the same directory it reads from. How can I output those out_files in different directory such as "C:\\texts\\Tokenized" ?
I know there are some ways to move those new files to other directory after producing them, but what I wanna know is that if there is anyway to output new files to other directory at the same time they are produced in above code?

Is this what you're looking for:
import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'
# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):
# build the output filename
base,ext = os.path.splitext(os.path.basename(input_file))
output_file = os.path.join(output_directory,base + '_tokenized' + ext)
with open(input_file) as in_file, open(output_file,'w') as out_file:
for line in in_file:
out_file.write(','.join(line.split()) + '\n')

This is how I output to files in arbitrary directories :
dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')
EDIT :
file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name
EDIT :
I just tried it, worked for me. You simply need two paths, one for the source directory and one for the destination. Hope this helps.
import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file), "w") as out_file:
data = in_file.readlines()
for line in data:
words = line.split()
str1 = ','.join(words)
out_file.write(str1)
out_file.write("\n")

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Copy files from text file in directory - python

Related

How to create new csv files from a directories with python?

How do I iterate over files in a directory including sub directories, using Python?

insert a word in lines and save the files using Python

double backslash (\\) in file name while opening

How to output in different directory?

Categories

Resources