How to replace the txt file in a directory - python

There is directory A, which contains several subdirectories of txt files. There is another directory B, which contains txt files. There are several txt files in A that have the same name in B but different content. Now I want to move the txt files in B to A and cover the files with the same name. My Code is as below:
import shutil
import os
src = '/PATH/TO/B'
dst = '/PATH/TO/A'
file_list = []
for filename in os.walk(dst):
file_list.append(filename)
for root, dirs, files in os.walk(src):
for file in files:
if file in file_list:
##os.remove(dst/file[:-4] + '.txt')
shutil.move(os.path.join(src,file),os.path.join(dst,file))
But when I run this, it did nothing. Can anyone help me about it?

The following will do what you want. You need to be careful to preserve the subdirectory structure so as to avoid FileNotFound exceptions. Test it out in a test directory before clobbering the actual directories you want modified so you know that it does what you want.
import shutil
import os
src = 'B'
dst = 'A'
file_list = []
dst_paths = {}
for root, dirs, files in os.walk(dst):
for file in files:
full_path = os.path.join(root, file)
file_list.append(file)
dst_paths[file] = full_path
print(file_list)
print(dst_paths)
for root, dirs, files in os.walk(src):
for file in files:
if file in file_list:
b_path = os.path.join(root, file)
shutil.move(b_path,dst_paths[file])

Related

How to copy specific files from the sub-folders to a new folder in python?

I have a folder with several sub-folders, each containing the same number of files (here it is 7). The code that I use at the present copies all the files from the different sub-folders within a main folder, to another new folder.
import os
import shutil
src = r'C:\Users\datasets\test\0'
dest = r'C:\Users\datasets\data_new\test\0'
for path, subdirs, files in os.walk(src):
for name in files:
filename = os.path.join(path, name)
shutil.copy2(filename, dest)
I need to modify the code in a way to copy only the last image (i.e. the 7th image in this case) from each sub-folder (windows file arrangement) to a new folder.
This should do it for you.
import os
import shutil
from glob import glob
src = r'C:\temp\datasets\test\0'
dest = r'C:\temp\datasets\data_new\test\0'
for base, dirs, _ in os.walk(src):
for path in dirs:
files = sorted(glob(os.path.join(base, path, '*')))
if len(files) == 0:
continue
file = files[-1]
filename = os.path.join(path, file)
shutil.copyfile(filename, dest)

Combine csv files with same name from different subfolders in to one csv

I have three CSV files each for a particular filename for multiple files. Let's say there are a total 20 filenames so total 20* 3csv files in three different folders.
Folder A- 1001.CSV,1002.CSV,1003.CSV...
Folder B-1001.CSV,1002.CSV,1003.CSV
Folder C-1001.csv,1002.csv,1003.csv......
I want to get a single CSV file for each 1001,1002,1003,1004.....
So total 20csv files
How can I do this? Since the files are in different folders glob is not working(or I don't know how to)
I made the following assumptions:
all the subfolders will be rooted at some known directory "parentdir"
each subfolder contains only relevant csv files
the csv files do not contain any header/footer lines
each record in the csv files is separated by a newline
all of the records in each file are relevant
This should produce a "concat.csv" file in each subfolder with the contents of all the other files in that same folder. I used a snippet of code from this other answer on stackoverflow for actually concatenating the files.
import os
import fileinput
rootdir = 'C:\\Users\\myname\\Desktop\\parentdir'
os.chdir(rootdir)
children = os.listdir()
for i in children:
path = os.path.join(rootdir, i)
os.chdir(path)
filenames = os.listdir()
with open('concat.csv', 'w') as fout, fileinput.input(filenames) as fin:
for line in fin:
fout.write(line + '\n')
import os
import shutil
import glob
import pandas as pd
path = '/mypath/'
# rename files
count = 1
for root, dirs, files in os.walk(path):
for i in files:
if i == 'whatever.csv':
os.rename(os.path.join(root, i), os.path.join(root, "whatever" + str(count) + ".csv"))
count += 1
# delete unwanted files
main_dir = path
folders = os.listdir(main_dir)
for (dirname, dirs, files) in os.walk(main_dir):
for file in files:
if file.startswith('dontwant'):
source_file = os.path.join(dirname, file)
os.remove(source_file)
# copy files to dir
for root, dirs, files in os.walk(path): # replace the . with your starting directory
for file in files:
if file.endswith('.csv'):
path_file = os.path.join(root,file)
shutil.copy2(path_file,path) # change you destination dir
# combine files
os.chdir(path)
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
combined_csv.to_csv( "combined_csv.csv", index=False, encoding='utf-8-sig')

Copying files in python using shutil

I have the following directory structure:
-mailDir
-folderA
-sub1
-sub2
-inbox
-1.txt
-2.txt
-89.txt
-subInbox
-subInbox2
-folderB
-sub1
-sub2
-inbox
-1.txt
-2.txt
-200.txt
-577.txt
The aim is to copy all the txt files under inbox folder into another folder.
For this I tried the below code
import os
from os import path
import shutil
rootDir = "mailDir"
destDir = "destFolder"
eachInboxFolderPath = []
for root, dirs, files in os.walk(rootDir):
for dirName in dirs:
if(dirName=="inbox"):
eachInboxFolderPath.append(root+"\\"+dirName)
for ii in eachInboxFolderPath:
for i in os.listdir(ii):
shutil.copy(path.join(ii,i),destDir)
If the inbox directory only has .txt files then the above code works fine. Since the inbox folder under folderA directory has other sub directory along with .txt files, the code returns permission denied error. What I understood is shutil.copy won't allow to copy the folders.
The aim is to copy only the txt files in every inbox folder to some other location. If the file names are same in different inbox folder I have to keep both file names. How we can improve the code in this case ? Please note other than .txt all others are folders only.
One simple solution is to filter for any i that does not have the .txt extension by using the string endswith() method.
import os
from os import path
import shutil
rootDir = "mailDir"
destDir = "destFolder"
eachInboxFolderPath = []
for root, dirs, files in os.walk(rootDir):
for dirName in dirs:
if(dirName=="inbox"):
eachInboxFolderPath.append(root+"\\"+dirName)
for ii in eachInboxFolderPath:
for i in os.listdir(ii):
if i.endswith('.txt'):
shutil.copy(path.join(ii,i),destDir)
This should ignore any folders and non-txt files that are found with os.listdir(ii). I believe that is what you are looking for.
Just remembered that I once wrote several files to solve this exact problem before. You can find the source code here on my Github.
In short, there are two functions of interest here:
list_files(loc, return_dirs=False, return_files=True, recursive=False, valid_exts=None)
copy_files(loc, dest, rename=False)
For your case, you could copy and paste these functions into your project and modify copy_files like this:
def copy_files(loc, dest, rename=False):
# get files with full path
files = list_files(loc, return_dirs=False, return_files=True, recursive=True, valid_exts=('.txt',))
# copy files in list to dest
for i, this_file in enumerate(files):
# change name if renaming
if rename:
# replace slashes with hyphens to preserve unique name
out_file = sub(r'^./', '', this_file)
out_file = sub(r'\\|/', '-', out_file)
out_file = join(dest, out_file)
copy(this_file, out_file)
files[i] = out_file
else:
copy(this_file, dest)
return files
Then just call it like so:
copy_files('mailDir', 'destFolder', rename=True)
The renaming scheme might not be exactly what you want, but it will at least not override your files. I believe this should solve all your problems.
Here you go:
import os
from os import path
import shutil
destDir = '<absolute-path>'
for root, dirs, files in os.walk(os.getcwd()):
# Filter out only '.txt' files.
files = [f for f in files if f.endswith('.txt')]
# Filter out only 'inbox' directory.
dirs[:] = [d for d in dirs if d == 'inbox']
for f in files:
p = path.join(root, f)
# print p
shutil.copy(p, destDir)
Quick and simple.
sorry, I forgot the part where, you also need unique file names as well. The above solution only works for distinct file names in a single inbox folder.
For copying files from multiple inboxes and having a unique name in the destination folder, you can try this:
import os
from os import path
import shutil
sourceDir = os.getcwd()
fixedLength = len(sourceDir)
destDir = '<absolute-path>'
filteredFiles = []
for root, dirs, files in os.walk(sourceDir):
# Filter out only '.txt' files in all the inbox directories.
if root.endswith('inbox'):
# here I am joining the file name to the full path while filtering txt files
files = [path.join(root, f) for f in files if f.endswith('.txt')]
# add the filtered files to the main list
filteredFiles.extend(files)
# making a tuple of file path and file name
filteredFiles = [(f, f[fixedLength+1:].replace('/', '-')) for f in filteredFiles]
for (f, n) in filteredFiles:
print 'copying file...', f
# copying from the path to the dest directory with specific name
shutil.copy(f, path.join(destDir, n))
print 'copied', str(len(filteredFiles)), 'files to', destDir
If you need to copy all files instead of just txt files, then just change the condition f.endswith('.txt') to os.path.isfile(f) while filtering out the files.

Python Move Files Based On Name

To give credit, the code I am currently working with is from this response by cji, here.
I am trying to recursively pull all files from the source folder, and move them into folders from the file names first-five characters 0:5
My Code Below:
import os
import shutil
srcpath = "SOURCE"
srcfiles = os.listdir(srcpath)
destpath = "DESTINATION"
# extract the three letters from filenames and filter out duplicates
destdirs = list(set([filename[0:5] for filename in srcfiles]))
def create(dirname, destpath):
full_path = os.path.join(destpath, dirname)
os.mkdir(full_path)
return full_path
def move(filename, dirpath):
shutil.move(os.path.join(srcpath, filename)
,dirpath)
# create destination directories and store their names along with full paths
targets = [(folder, create(folder, destpath)) for folder in destdirs]
for dirname, full_path in targets:
for filename in srcfiles:
if dirname == filename[0:5]:
move(filename, full_path)
Now, changing srcfiles = os.listdir(srcpath) and destdirs = list(set([filename[0:5] for filename in srcfiles])) with the code below gets me the paths in one variable and the first five characters of the file names in another.
srcfiles = []
destdirs = []
for root, subFolders, files in os.walk(srcpath):
for file in files:
srcfiles.append(os.path.join(root,file))
for name in files:
destdirs.append(list(set([name[0:5] for file in srcfiles])))
How would I go about modifying the original code to use this... Or if someone has a better idea on how I would go about doing this. Thanks.
I can't really test it very easily, but I think this code should work:
import os
import shutil
srcpath = "SOURCE"
destpath = "DESTINATION"
for root, subFolders, files in os.walk(srcpath):
for file in files:
subFolder = os.path.join(destpath, file[:5])
if not os.path.isdir(subFolder):
os.makedirs(subFolder)
shutil.move(os.path.join(root, file), subFolder)

Traversing File Directory

this is the first question I am posting on stackoverflow so excuse me if I did something out of the norm.
I am trying to create a python program which traverses a user selected directory to display all file contents of the folders selected. For example: Documents folders has several folders with files inside of them, I am trying to save all files in the Documents folder to an array.
The method below is what I am using to traverse a directory (hoping it is a simple problem)
def saveFilesToArray(dir):
allFiles = []
os.chdir(dir)
for file in glob.glob("*"):
print(file)
if (os.path.isfile(file)):
allFiles.append(file)
elif(os.path.isdir(file)):
print(dir + "/" + file + " is a directory")
allFiles.append(saveFilesToArray(dir + "/" + file))
return allFiles
This will give you just the files:
import os
def list_files(root):
all_files = []
for root, dirs, files in os.walk(root, followlinks=True):
for file in files:
full_path = os.path.join(root, file)
all_files.append(full_path)
return all_files
I hope this is helpful:
import os
def saveFilesToList(theDir):
allFiles = []
for root, dirs, files in os.walk(theDir):
for name in files:
npath = os.path.join(root,name)
if os.path.isfile(npath):
allFiles.append(npath)
return allFiles
Traverses all directories and stores the path to files (that are not directories) in the list. It seems much easier to use this than glob.

Categories