Is there an efficient way to recurse in a directory? - python

I want to perform:
iterate over the content of the folder
if content is file, append to list
if content is folder, goto 1
if folder name is "depth" or "ir", ignore
I am using python. Can you help?

ended up doing something like:
_files = []
dir = "path/to/folder"
for root, dirs, files in os.walk(dir, topdown=False):
for name in files:
files = os.path.join(root, name)
if root.split("/")[-1] in ["depth", "ir"]:
continue
_files.append(files)
print(_files)

The os.walk() will recurse for you.
import os
res = []
for (root, dirs, files) in os.walk('/path/to/dir'):
# root is the absolute path to the dir, so check if the last part is depth or ir
if root.split("/")[-1] in ["depth", "ir"]:
continue
else:
# files is a list of files
res.extend(files)
print(res)

Try this
import os
basepath ="<path>"
files=[]
def loopover(path):
contents = os.listdir(path)
for c in contents:
d = os.path.join(path,c)
if os.path.isfile(d):
files.append(c)
if os.path.isdir(d):
if (c=="depth" or c=="ir"):
continue
else:
loopover(d)
loopover(basepath)

Related

How to recursively move all files inside subdirectories to main folder? [python]

I have a folder directories look somewhat like this:
C:/Documents/A350/a/1.png
/2.png
b/1.png
/B777/a/1.png
/B747/a/1.png
/2.png
b/1.png
c/1.png
d/1.png
/2.png
I want to move all png to the main folder i.e. Documents.
def recur(input_path):
dir_list = os.listdir(input_path)
for directory in dir_list:
path_name = os.path.join(input_path, directory)
p = pathlib.Path(path_name)
if p.is_dir():
input_path = path_name
return recur(input_path)
return input_path
I have some code to get the deepest path inside a folder, but i am not so sure how to use the recursive function to achieve what i wanted.
Any help would be really appreciated, thanks!!
Below program get all files recursively from parent directory and copies files to parent directory.
import os
import glob
import shutil
files_abs_paths = []
def get_all_files(parent_dir):
files_n_folders = glob.glob(f'{parent_dir}/**')
for fl_or_fldr in files_n_folders:
if os.path.isdir(fl_or_fldr):
folder = fl_or_fldr
get_all_files(folder)
else:
file = fl_or_fldr
files_abs_paths.append(file)
parent_dir = r"C:'/Documents"
# get all files recursively in parent dir
get_all_files(parent_dir)
# copies files to parent_dir
for fl in files_abs_paths:
# gets file_name
file_name = os.path.basename(fl)
# create file in parent_dir
new_file_loc = f'{parent_dir}/{file_name}'
if os.path.exists(new_file_loc) is False:
shutil.copyfile(fl, new_file_loc)
You can also get all the files from a folder tree using os.walk:
If you don't mind overwriting files with duplicate names:
from os import walk, rename
from os.path import join
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
rename(join(src_path, name), join(root, name))
If you want to add a number to the end of files with duplicate names:
from os import walk, rename
from os.path import join, splitext, exists
def collect_files(root):
for src_path, _, files in walk(root):
if src_path != root:
for name in files:
dst_name = name
dst_name_parts = splitext(dst_name)
file_num = 1
while exists(join(root, dst_name)):
dst_name = '{}_{:0>3}{}'.format(dst_name_parts[0], file_num, dst_name_parts[1])
file_num += 1
rename(join(src_path, name), join(root, dst_name))

How to loop over folders and skip one

I have this code below, I want to skip "NetIQ" folder in my iteration, but I think I'm missing something there because I still get files from that folder when I run my code, please help.
path = "C:\User\Work\Identity\TestFolders"
def list_files(dir):
r = []
skip = ["NetIQ"]
for root, dirs, files in os.walk(dir):
if dirs in skip:
continue
else:
for name in files:
r.append(os.path.join(root, name))
return r
print(list_files(path))
Try out a list comprehension:
def list_files(dir):
r = []
skip = ["NetIQ"]
for root, dirs, files in os.walk(dir):
dirs[:] = [d for d in dirs if d not in skip]
for name in files:
r.append(os.path.join(root, name))
return r
print(list_files(path))
I think your expectation of the output of this for loop is wrong. os.walk does a depth first search through your directories and will yield a new output (root, dirs and files) for every subdirectory. So if you say
if dirs in skip:
continue
This will only apply for the directory where NetIQ is actually located - check, the files in this folder should be missing.
For a solution, see the other answers...
That's because dirs is not a string but a tuple. You can have a take a look at the doc here.
You can either check if the first item in the tuple is in skip:
def list_files(dir):
r = []
skip = ["omegleClient"]
for root, dirs, files in os.walk(dir):
if dirs[0] not in skip:
for name in files:
r.append(os.path.join(root, name))
return r
or check if any item in the tuple is in skip like so:
def list_files(dir):
r = []
skip = ["omegleClient"]
for root, dirs, files in os.walk(dir):
if not any(d in dirs for d in skip):
for name in files:
r.append(os.path.join(root, name))
return r

How to create a python list with the number of file in each sub directory of a directory

I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]

Get absolute path of files in sub-directory

I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?
Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt
Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']
Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files

Python, copy only directories

I have a program that has a list of some files. I have to copy only the directories and the subdirectories from the list to a specified directories and don't need to copy the files. I tried this, but it doesn't work.
def copiarDirs():
items = list.curselection()
desti = tkFileDialog.askdirectory()
for dirs in os.walk(items, topdown=False):
for name in dirs:
#for i in items :
aux=root+"/"+list.get(i)
tryhard=("cp "+str(aux)+" "+str(desti))
os.system(tryhard)
Try this:
import os
def copyDirs(source, destination):
for subdir, dirs, files in os.walk(source):
for f in files:
dir = destination + os.path.join(subdir).split(':')[1]
if not os.path.exists(dir):
os.makedirs(dir)
sourceDir = 'D:\\Work\\'
destDir = 'D:\\Dest\\'
copyDirs(sourceDir, destDir) #calling function

Categories