Search a folder and sub folders for files starting with criteria - python

I have a folder "c:\test" , the folder "test" contains many sub folders and files (.xml, .wav). I need to search all folders for files in the test folder and all sub-folders, starting with the number 4 and being 7 characters long in it and copy these files to another folder called 'c:\test.copy' using python. any other files need to be ignored.
So far i can copy the files starting with a 4 but not structure to the new folder using the following,
from glob import glob
import os, shutil
root_src_dir = r'C:/test' #Path of the source directory
root_dst_dir = 'c:/test.copy' #Path to the destination directory
for file in glob('c:/test/**/4*.*'):
shutil.copy(file, root_dst_dir)
any help would be most welcome

You can use os.walk:
import os
import shutil
root_src_dir = r'C:/test' #Path of the source directory
root_dst_dir = 'c:/test.copy' #Path to the destination directory
for root, _, files in os.walk(root_src_dir):
for file in files:
if file.startswith("4") and len(file) == 7:
shutil.copy(os.path.join(root, file), root_dst_dir)
If, by 7 characters, you mean 7 characters without the file extension, then replace len(file) == 7 with len(os.path.splitext(file)[0]) == 7.

This can be done using the os and shutil modules:
import os
import shutil
Firstly, we need to establish the source and destination paths. source should the be the directory you are copying and destination should be the directory you want to copy into.
source = r"/root/path/to/source"
destination = r"/root/path/to/destination"
Next, we have to check if the destination path exists because shutil.copytree() will raise a FileExistsError if the destination path already exists. If it does already exist, we can remove the tree and duplicate it again. You can think of this block of code as simply refreshing the duplicate directory:
if os.path.exists(destination):
shutil.rmtree(destination)
shutil.copytree(source, destination)
Then, we can use os.walk to recursively navigate the entire directory, including subdirectories:
for path, _, files in os.walk(destination):
for file in files:
if not file.startswith("4") and len(os.path.splitext(file)[0]) != 7:
os.remove(os.path.join(path, file))
if not os.listdir(path):
os.rmdir(path)
We then can loop through the files in each directory and check if the file does not meet your condition (starts with "4" and has a length of 7). If it does not meet the condition, we simply remove it from the directory using os.remove.
The final if-statement checks if the directory is now empty. If the directory is empty after removing the files, we simply delete that directory using os.rmdir.

Related

Python script to move specific filetypes from the all directories to one folder

I'm trying to write a python script to move all music files from my whole pc to one spcific folder.
They are scattered everywhere and I want to get them all in one place, so I don't want to copy but completely move them.
I was already able to make a list of all the files with this script:
import os
targetfiles = []
extensions = (".mp3", ".wav", ".flac")
for root, dirs, files in os.walk('/'):
for file in files:
if file.endswith(extensions):
targetfiles.append(os.path.join(root, file))
print(targetfiles)
This prints out a nice list of all the files but I'm stuck to now move them.
I did many diffent tries with different code and this was one of them:
import os
import shutil
targetfiles = []
extensions = (".mp3", ".wav", ".flac")
for root, dirs, files in os.walk('/'):
for file in files:
if file.endswith(extensions):
targetfiles.append(os.path.join(root, file))
new_path = 'C:/Users/Nicolaas/Music/All' + file
shutil.move(targetfiles, new_path)
But everything I try gives me an error:
TypeError: rename: src should be string, bytes or os.PathLike, not list
I think I've met my limit gathering this all as I'm only starting at Python but I would be very grateful if anyone could point me in the right direction!
You are trying to move a list of files to a new location, but the shutil.move function expects a single file as the first argument. To move all the files in the targetfiles list to the new location, you have to use a loop to move each file individually.
for file in targetfiles:
shutil.move(file, new_path)
Also if needed add a trailing slash to the new path 'C:/Users/Nicolaas/Music/All/'
On a sidenote are you sure that moving all files with those extentions is a good idea? I would suggest copying them or having a backup.
Edit:
You can use an if statement to exclude certain folders from being searched.
for root, dirs, files in os.walk('/'):
if any(folder in root for folder in excluded_folders):
continue
for file in files:
if file.endswith(extensions):
targetfiles.append(os.path.join(root, file))
Where excluded_folder is a list of the unwanted folders like: excluded_folders = ['Program Files', 'Windows']
I would suggest using glob for matching:
import glob
def match(extension, root_dir):
return glob.glob(f'**\\*.{extension}', root_dir=root_dir, recursive=True)
root_dirs = ['C:\\Path\\to\\Albums', 'C:\\Path\\to\\dir\\with\\music\\files']
excluded_folders = ['Bieber', 'Eminem']
extensions = ("mp3", "wav", "flac")
targetfiles = [f'{root_dir}\\{file_name}' for root_dir in root_dirs for extension in extensions for file_name in match(extension, root_dir) if not any(excluded_folder in file_name for excluded_folder in excluded_folders)]
Then you can move these files to new_path

Directory walk and remove files/directories

I copied a (presumably large) number of files on to an existing directory, and I need to reverse the action. The targeted directory contains a number of other files, that I need to keep there, which makes it impossible to simply remove all files from the directory. I was able to do it with Python. Here's the script:
import os, sys, shutil
source = "/tmp/test/source"
target = "/tmp/test/target"
for root, dirs, files in os.walk(source): # for files and directories in source
for dir in dirs:
if dir.startswith("."):
print(f"Removing Hidden Directory: {dir}")
else:
print(f"Removing Directory: {dir}")
try:
shutil.rmtree(f"{target}/{dir}") # remove directories and sub-directories
except FileNotFoundError:
pass
for file in files:
if file.startswith("."): # if filename starts with a dot, it's a hidden file
print(f"Removing Hidden File: {file}")
else:
print(f"Removing File: {file}")
try:
os.remove(f"{target}/{file}") # remove files
except FileNotFoundError:
pass
print("Done")
The script above looks in the original (source) directory and lists those files. Then it looks into the directory you copied the files to(target), and removes only the listed files, as they exist in the source directory.
How can I do the same thing in Go? I tried filepath.WalkDir(), but as stated in the docs:
WalkDir walks the file tree rooted at root, calling fn for each file
or directory in the tree, including root.
If WalkDir() includes the root, then os.Remove() or os.RemoveAll() will delete the whole thing.
Answered by Cerise Limon. Use os.ReadDir to read source the directory entries. For each entry, os.RemoveAll the corresponding target file

Python: Unzip selected files in directory tree

I have the following directory, in the parent dir there are several folders lets say ABCD and within each folder many zips with names as displayed and the letter of the parent folder included in the name along with other info:
-parent--A-xxxAxxxx_timestamp.zip
-xxxAxxxx_timestamp.zip
-xxxAxxxx_timestamp.zip
--B-xxxBxxxx_timestamp.zip
-xxxBxxxx_timestamp.zip
-xxxBxxxx_timestamp.zip
--C-xxxCxxxx_timestamp.zip
-xxxCxxxx_timestamp.zip
-xxxCxxxx_timestamp.zip
--D-xxxDxxxx_timestamp.zip
-xxxDxxxx_timestamp.zip
-xxxDxxxx_timestamp.zip
I need to unzip only selected zips in this tree and place them in the same directory with the same name without the .zip extension.
Output:
-parent--A-xxxAxxxx_timestamp
-xxxAxxxx_timestamp
-xxxAxxxx_timestamp
--B-xxxBxxxx_timestamp
-xxxBxxxx_timestamp
-xxxBxxxx_timestamp
--C-xxxCxxxx_timestamp
-xxxCxxxx_timestamp
-xxxCxxxx_timestamp
--D-xxxDxxxx_timestamp
-xxxDxxxx_timestamp
-xxxDxxxx_timestamp
My effort:
for path in glob.glob('./*/xxx*xxxx*'): ##walk the dir tree and find the files of interest
zipfile=os.path.basename(path) #save the zipfile path
zip_ref=zipfile.ZipFile(path, 'r')
zip_ref=extractall(zipfile.replace(r'.zip', '')) #unzip to a folder without the .zip extension
The problem is that i dont know how to save the A,B,C,D etc to include them in the path where the files will be unzipped. Thus, the unzipped folders are created in the parent directory. Any ideas?
The code that you have seems to be working fine, you just to make sure that you are not overriding variable names and using the correct ones. The following code works perfectly for me
import os
import zipfile
import glob
for path in glob.glob('./*/xxx*xxxx*'): ##walk the dir tree and find the files of interest
zf = os.path.basename(path) #save the zipfile path
zip_ref = zipfile.ZipFile(path, 'r')
zip_ref.extractall(path.replace(r'.zip', '')) #unzip to a folder without the .zip extension
Instead of trying to do it in a single statement , it would be much easier and more readable to do it by first getting list of all folders and then get list of files inside each folder. Example -
import os.path
for folder in glob.glob("./*"):
#Using *.zip to only get zip files
for path in glob.glob(os.path.join(".",folder,"*.zip")):
filename = os.path.split(path)[1]
if folder in filename:
#Do your logic

How to copy all files from a folder (including sub-folder) while not copying the folder structure in python

Can someone help me about how to copy all files from a folder to another destination folder in python. The catch is I do not want to copy the sub-directory structure. But I want the files within them.
For example, lets say in the root folder, there are 3 folders, each containing 10 files. Also in each of them there are 2 folders each containing 5 files. (so each first level folder has in total 20 files and 2 sub directories under it). Bringing the total to 60 files.
I wish to copy all of those 60 files to a single destination directory, discarding the subfolder structure.
This is the code I've tried:
# path : source folder path
# compiled_path: destination folder path
w = os.walk(path)
for root, dirs, files in w:
for dir_name in dirs:
file_list_curent_dir = os.walk(path+"\\"+dir_name).next()[2]
for item in file_list_curent_dir:
shutil.copy(path+"\\"+dir_name+"\\"+item, compiled_path+"\\"+item )
It copies the files uppermost level, not the folders within sub-directories.
Thank you very much for your time.
import os
import shutil
for root, dirs, files in os.walk('.'): # replace the . with your starting directory
for file in files:
path_file = os.path.join(root,file)
shutil.copy2(path_file,'destination_directory') # change you destination dir
you can use this raw function (but the best way to go when you want to go recursively over directories is os.walk)
from shutil import copyfile
import shutil
def your_function(dir):
for folder in os.listdir(dir):
folder_full_path = os.path.join(dir,folder)
move_down_and_delete(folder_full_path,folder_full_path)
def move_down_and_delete(input,copy_to_dir):
if os.path.isfile(input):
dest = os.path.join(copy_to_dir,os.path.basename(input))
print dest,input
copyfile(input,dest)
return
for child in os.listdir(input):
current_obj_path = os.path.join(input, child)
move_down_and_delete(current_obj_path,copy_to_dir)
if not os.path.isfile(current_obj_path):shutil.rmtree(current_obj_path)

copy and rename files in a directory in a specific pattern

I would like to copy a file lying in a directory equal to the number of times the other files lying in that directory and then rename all the new files.
For example, there are 3 files in a directory, filename1.xls, filename2.xls and filename3.xls. I would like to copy Filename1.xls 2 times (as there are 2 files in the directory excluding filename1.xls )and then rename each copied file as filename2.xls and fiilename3.xls. Hope my question is clear. Thanks, AD
hm... just get the amount of files in directory, copy your file N times and save them as
for number in range(amount):
"feliname%r.xls" % number
if I understand what you mean
To replace content of all files that have names that start with "F" and that are adjacent to a file given at the command-line with its copy:
#!/usr/bin/env python
import os
import shutil
import sys
filename = sys.argv[1] # provide file you want to multiply
dirname, basename = os.path.split(filename)
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
#note: os.path.normcase() might be required to compare names
if name.startswith("F") and name != basename and os.path.isfile(path):
shutil.copy2(filename, path) #note: some metadata is not copied
Note: if the copy fails; the destination file might be destroyed. You can copy to a temporary file first in this case before replacing the destination.

Categories