How to loop over folders and skip one - python

I have this code below, I want to skip "NetIQ" folder in my iteration, but I think I'm missing something there because I still get files from that folder when I run my code, please help.
path = "C:\User\Work\Identity\TestFolders"
def list_files(dir):
r = []
skip = ["NetIQ"]
for root, dirs, files in os.walk(dir):
if dirs in skip:
continue
else:
for name in files:
r.append(os.path.join(root, name))
return r
print(list_files(path))

Try out a list comprehension:
def list_files(dir):
r = []
skip = ["NetIQ"]
for root, dirs, files in os.walk(dir):
dirs[:] = [d for d in dirs if d not in skip]
for name in files:
r.append(os.path.join(root, name))
return r
print(list_files(path))

I think your expectation of the output of this for loop is wrong. os.walk does a depth first search through your directories and will yield a new output (root, dirs and files) for every subdirectory. So if you say
if dirs in skip:
continue
This will only apply for the directory where NetIQ is actually located - check, the files in this folder should be missing.
For a solution, see the other answers...

That's because dirs is not a string but a tuple. You can have a take a look at the doc here.
You can either check if the first item in the tuple is in skip:
def list_files(dir):
r = []
skip = ["omegleClient"]
for root, dirs, files in os.walk(dir):
if dirs[0] not in skip:
for name in files:
r.append(os.path.join(root, name))
return r
or check if any item in the tuple is in skip like so:
def list_files(dir):
r = []
skip = ["omegleClient"]
for root, dirs, files in os.walk(dir):
if not any(d in dirs for d in skip):
for name in files:
r.append(os.path.join(root, name))
return r

Related

How to create a python list with the number of file in each sub directory of a directory

I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]

Is there an efficient way to recurse in a directory?

I want to perform:
iterate over the content of the folder
if content is file, append to list
if content is folder, goto 1
if folder name is "depth" or "ir", ignore
I am using python. Can you help?
ended up doing something like:
_files = []
dir = "path/to/folder"
for root, dirs, files in os.walk(dir, topdown=False):
for name in files:
files = os.path.join(root, name)
if root.split("/")[-1] in ["depth", "ir"]:
continue
_files.append(files)
print(_files)
The os.walk() will recurse for you.
import os
res = []
for (root, dirs, files) in os.walk('/path/to/dir'):
# root is the absolute path to the dir, so check if the last part is depth or ir
if root.split("/")[-1] in ["depth", "ir"]:
continue
else:
# files is a list of files
res.extend(files)
print(res)
Try this
import os
basepath ="<path>"
files=[]
def loopover(path):
contents = os.listdir(path)
for c in contents:
d = os.path.join(path,c)
if os.path.isfile(d):
files.append(c)
if os.path.isdir(d):
if (c=="depth" or c=="ir"):
continue
else:
loopover(d)
loopover(basepath)

Get absolute path of files in sub-directory

I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?
Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt
Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']
Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files

Python, copy only directories

I have a program that has a list of some files. I have to copy only the directories and the subdirectories from the list to a specified directories and don't need to copy the files. I tried this, but it doesn't work.
def copiarDirs():
items = list.curselection()
desti = tkFileDialog.askdirectory()
for dirs in os.walk(items, topdown=False):
for name in dirs:
#for i in items :
aux=root+"/"+list.get(i)
tryhard=("cp "+str(aux)+" "+str(desti))
os.system(tryhard)
Try this:
import os
def copyDirs(source, destination):
for subdir, dirs, files in os.walk(source):
for f in files:
dir = destination + os.path.join(subdir).split(':')[1]
if not os.path.exists(dir):
os.makedirs(dir)
sourceDir = 'D:\\Work\\'
destDir = 'D:\\Dest\\'
copyDirs(sourceDir, destDir) #calling function

A Python walker that can ignore directories

I need a file system walker that I could instruct to ignore traversing
directories that I want to leave untouched, including all subdirectories
below that branch.
The os.walk and os.path.walk just don't do it.
Actually, os.walk may do exactly what you want. Say I have a list (perhaps a set) of directories to ignore in ignore. Then this should work:
def my_walk(top_dir, ignore):
for dirpath, dirnames, filenames in os.walk(top_dir):
dirnames[:] = [
dn for dn in dirnames
if os.path.join(dirpath, dn) not in ignore ]
yield dirpath, dirnames, filenames
It is possible to modify the second element of os.walk's return values in-place:
[...] the caller can modify the dirnames list in-place (perhaps using del or slice assignment), and walk() will only recurse into the subdirectories whose names remain in dirnames; this can be used to prune the search [...]
def fwalk(root, predicate):
for dirpath, dirnames, filenames in os.walk(root):
dirnames[:] = [d for d in dirnames if predicate(r, d)]
yield dirpath, dirnames, filenames
Now, you can just hand in a predicate for subdirectories:
>>> ignore_list = [...]
>>> list(fwalk("some/root", lambda r, d: d not in ignore_list))
Here's the best and simple solution.
def walk(ignores):
global ignore
path = os.getcwd()
for root, dirs, files in os.walk(path):
for ignore in ignores:
if(ignore in dirs):
dirs.remove(ignore)
print root
print dirs
print files
walk(['.git', '.svn'])
Remember, if you remove the folder name from dirs, it won't be explore by os.walk.
hope it helps
So I made this home-roles walker function:
import os
from os.path import join, isdir, islink, isfile
def mywalk(top, topdown=True, onerror=None, ignore_list=('.ignore',)):
try:
# Note that listdir and error are globals in this module due
# to earlier import-*.
names = os.listdir(top)
except Exception, err:
if onerror is not None:
onerror(err)
return
if len([1 for x in names if x in ignore_list]):
return
dirs, nondirs = [], []
for name in names:
if isdir(join(top, name)):
dirs.append(name)
else:
nondirs.append(name)
if topdown:
yield top, dirs, nondirs
for name in dirs:
path = join(top, name)
if not islink(path):
for x in mywalk(path, topdown, onerror, ignore_list):
yield x
if not topdown:
yield top, dirs, nondirs

Categories