Loop through folder python - python

Basically I have 4 subfolders in my directory, and these are present in an array with the following structure:
path_list = [path1, path2, path3, path4]
When I run this code, I can scan all files in one of the folders.
for file_name in os.listdir(path_list[2]):
full_path = os.path.join(path_list[2], file_name)
...
new_sub = os.path.join(new_path, subdir_list[2])
final_path = os.path.join(new_sub, file_name)
imsave(final_path, img_norm)
I would like to find a way for the loop to scan the folder and once it has finished, an i++ occurred in path_list[i] and subdir_list[i] which it could change the value of the path. The loop should stop when it have scanned and modified all the files in the 4 folders.

You can wrap the entire thing in a for loop.
path_list = [path1, path2, path3]
subdir_list = [subdir1, subdir2, subdir3]
for i, _path in enumerate(path_list):
for file_name in os.listdir(_path):
full_path = os.path.join(_path, file_name)
...
new_sub = os.path.join(new_path, subdir_list[i])
final_path = os.path.join(new_sub, file_name)
imsave(final_path, img_norm)

Related

how to use element tree in this code to find a specific element

I have this code that moves all the jpegs, txts and xmls from one directory to another. i would like the create a loop that find a element inside the xml file and if it does exist it there, than it will move the file and if not it will keep the file in the directory. maybe someone can give me an advice?
this is my code so far:
import os
import glob
import shutil
def remove_ext(list_of_pathnames):
"""
removes the extension from each filename
"""
return [os.path.splitext(filename)[0] for filename in list_of_pathnames]
path = os.getcwd()
os.chdir("D:\\TomProject\\")
os.mkdir("Done") # create a new folder
newpath = os.path.join("D:\\TomProject\\", "Done") # made it os independent...
list_of_jpegs = glob.glob(os.path.join(path, '*.jpeg'))
list_of_xmls = glob.glob(os.path.join(path, '*.xml'))
list_of_txts = glob.glob(os.path.join(path, '*.txt'))
print(path)
print(list_of_jpegs, "\n\n", list_of_xmls, "\n\n", list_of_txts,
"\n\n") # remove
jpegs_without_extension = remove_ext(list_of_jpegs)
xmls_without_extension = remove_ext(list_of_xmls)
txts_without_extension = remove_ext(list_of_txts)
for filename in jpegs_without_extension:
if filename in xmls_without_extension:
if filename in txts_without_extension:
print("moving", filename) # remove
shutil.move(filename + '.jpeg', newpath) # move image to new path.
shutil.move(filename + '.xml', newpath)
shutil.move(filename + '.txt', newpath)

Get files not in hidden folders

The test is failing because it's getting the files from hidden folders too. How can I modify the code so that it skips the hidden folders?
def get_files_not_in_hidden_folder(parent_folder: str, extension: str) -> List[str]:
"""
Get all files recursively from parent folder,
except for the ones that are in hidden folders
"""
files = []
for root, _, filenames in os.walk(parent_folder):
for filename in filenames:
if filename.endswith(extension) and not root.startswith('.'):
files.append(os.path.join(root, filename))
logger.debug(f"get_files_not_in_hidden_folder: {parent_folder}, {extension} -> {files}")
return files
def test_get_files_not_in_hidden_folder():
Path('tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/test.json').touch()
Path('tmp/tmp/.tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/tmp/.tmp/test.json').touch()
Path('tmp/.tmp/tmp').mkdir(parents=True, exist_ok=True)
Path('tmp/.tmp/tmp/test.json').touch()
assert get_files_not_in_hidden_folder('tmp', '.json') == ['tmp/test.json']
shutil.rmtree(Path('tmp'))
What you call root is the full path, including parent names.
If you want to convert to just the directory name, you can use os.path.basename, like:
for root, _, filenames in os.walk(parent_folder):
for filename in filenames:
if filename.endswith(extension) and "/." not in root:
files.append(os.path.join(root, filename))
I would implement this something like as follows ...
def my_walk(root_dir):
files,dirs = [],[]
try:
for fname in os.listdir(root_dir):
if not fname.startswith("."):
fpath = os.path.join(root_dir,fname)
if os.path.isdir(fpath):
dirs.append(fpath)
else:
files.append(fpath)
except:
print("SKIP:",root_dir)
yield root_dir,dirs,files
for d in dirs:
yield from my_walk(d)
I think should work ...
for root, _, filenames in my_walk(parent_folder):
print(f"{root} contains {filenames}")

Is there an efficient way to recurse in a directory?

I want to perform:
iterate over the content of the folder
if content is file, append to list
if content is folder, goto 1
if folder name is "depth" or "ir", ignore
I am using python. Can you help?
ended up doing something like:
_files = []
dir = "path/to/folder"
for root, dirs, files in os.walk(dir, topdown=False):
for name in files:
files = os.path.join(root, name)
if root.split("/")[-1] in ["depth", "ir"]:
continue
_files.append(files)
print(_files)
The os.walk() will recurse for you.
import os
res = []
for (root, dirs, files) in os.walk('/path/to/dir'):
# root is the absolute path to the dir, so check if the last part is depth or ir
if root.split("/")[-1] in ["depth", "ir"]:
continue
else:
# files is a list of files
res.extend(files)
print(res)
Try this
import os
basepath ="<path>"
files=[]
def loopover(path):
contents = os.listdir(path)
for c in contents:
d = os.path.join(path,c)
if os.path.isfile(d):
files.append(c)
if os.path.isdir(d):
if (c=="depth" or c=="ir"):
continue
else:
loopover(d)
loopover(basepath)

Get absolute path of files in sub-directory

I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?
Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt
Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']
Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files

copy (duplicate) files in python

I want to copy all files from 2 directories to another, but when the files have the same name they get copied once(by one dir. not both) in the destination directory.
Here is the code:
def cp_files_tmp(src, dest):#copy dir's files into a dir
src_files = os.listdir(src)
for file_name in src_files:
full_file_name = os.path.join(src, file_name)
if (os.path.isfile(full_file_name)):
shutil.copy(full_file_name, dest)
dest = 'C:/temp/'
src_A= 'C:/sil/in/'
src_B= 'C:/olw/in/'
cp_files_tmp(src_A, dest) # cp files
cp_files_tmp(src_B, dest) # cp files
But what is an expected behaviour? If two files have the same name, then copying them simply overwrites the first one with the second, and there is no way it ends up differenly. You have to propose some kind of names unification, for example, if a file X exists, then copy the second X as X.tmp or something like this.
def cp_files_tmp(src, dest):
src_files = os.listdir(src)
for file_name in src_files:
full_file_name = os.path.join(src, file_name)
full_destination=os.path.join(dest,file_name)
if (os.path.isfile(full_file_name)):
while os.path.exists(full_destination):
full_destination += ".duplicate"
shutil.copy(full_file_name, full_destination)
dest = 'C:/temp/'
src_A= 'C:/sil/in/'
src_B= 'C:/olw/in/'
cp_files_tmp(src_A, dest) # cp files
cp_files_tmp(src_B, dest) # cp files
This should result in files being copied from C:/sil/in and C:/olw/in to C:/temp and in case of duplicates it simply changes the resulting name to FILE_NAME.duplicate

Categories