reading content of a directory and its subdirectories with python - python

i have directory path/to/dir that contains 10 subdirectories. there files in all of the subdirectories. I want to append these files in every subdirectory into a unique list.
How can I do this?

What i understood is you just wanted to list all the filenames from a particular directory and its sub-directory.
list1=[] #Empty list
for root, dirs, files in os.walk(fr"path\\to\\directory", topdown=False):
#Listing Directory
for name in files:
a=os.path.join(root, name)
print(a)
list1.append(a)

import os
path="C://Users//<user_name>//Desktop"
obj = os.scandir()
print("Files and Directories in '% s':" % path)
for entry in obj:
if entry.is_dir() or entry.is_file():
print(entry.name)
For listing whole directory:
import os
def list_dirs(path):
obj = os.scandir(path)
for entry in obj:
if entry.is_dir():
print(1,entry.path)
list_dirs(entry)
elif entry.is_file():
print(2,entry.name)
else:
break
list_dirs(path)
It basically use's os.scandir method which provides both speed and more benefit like file meta data ect, with recursion to obtain list of whole directory.
also a list comprehension method for #FaraazKurawle 's answer:
def list_dir(path):
path=fr"{path}"
list_1=[os.path.join(root, name) for root, dirs, files in os.walk(path, topdown=False) for name in files]
return list_1
some helpful links:
List all directory
Os.scandir
os.scandir vs os.listdir vs os.walk

import glob
path = '\\path\\to\\dir\\'
files = [f for f in glob.glob(path + "**/*", recursive=True)]

Related

Recursive file walk only going one depth down

I am trying to create code for an assignment that can walk down a directory and return all files
I am having trouble with multilevel folders, such as
folder1
---> folder2
------->foo.txt
I have the following code
def find_larger(path, max_n_results=10):
files = []
print(path)
path_files = os.listdir(path)
for file in path_files:
if os.path.isdir(file):
files += find_larger(os.path.join(path, file))
files.append(file)
return files
print(find_larger('.'))
However, If I were to run that code I would get the following result
[folder1, folder2]
I have run through this is a debugger and the program is not detecting the second directory to actually be a directory.
How can I get the program to walk all the way through the directory.
Note, I am not allowed to user os.walk
os.path.isdir() takes a full path, you are only giving it the relative name. Create the path first, then test that:
def find_larger(path, max_n_results=10):
files = []
print(path)
path_files = os.listdir(path)
for file in path_files:
subpath = os.path.join(path, file)
if os.path.isdir(subpath):
files += find_larger(subpath)
files.append(subpath)
return files
However, you are re-inventing a wheel here, just use the os.walk() function to list directory contents :
def find_larger(path, max_n_results=10):
files = []
print(path)
for dirpath, dirnames, filenames in os.walk(path):
files += (os.join(dirpath, filename) for filename in filenames)
return files

Python folder names in the directory

how can i get the folder names existing in a directory using Python ?
I want to save all the subfolders into a list to work with the names after that but i dont know how to read the subfolder names ?
Thanks for you help
You can use os.walk()
# !/usr/bin/python
import os
directory_list = list()
for root, dirs, files in os.walk("/path/to/your/dir", topdown=False):
for name in dirs:
directory_list.append(os.path.join(root, name))
print directory_list
EDIT
If you only want the first level and not actually "walk" through the subdirectories, it is even less code:
import os
root, dirs, files = os.walk("/path/to/your/dir").next()
print dirs
This is not really what os.walk is made for. If you really only want one level of subdirectories, you can also use os.listdir() like Yannik Ammann suggested:
root='/path/to/my/dir'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
print dirlist
Starting with Python 3.4, you can also use the new pathlib module:
from pathlib import Path
p = Path('some/folder')
subdirectories = [x for x in p.iterdir() if x.is_dir()]
print(subdirectories)
You can use os.listdir() here a link to the docs
Warning returns files and directories
example:
import os
path = 'pyth/to/dir/'
dir_list = os.listdir(path)
update: you need to check if the returned names are directories or files
import os
path = 'pyth/to/dir/'
# list of all content in a directory, filtered so only directories are returned
dir_list = [directory for directory in os.listdir(path) if os.path.isdir(path+directory)]
You should import os first.
import os
files=[]
files = [f for f in sorted(os.listdir(FileDirectoryPath))]
This would give you list with all files in the FileDirectoryPath sorted.
I use os.listdir
Get all folder names of a directory
folder_names = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_names.append(entry_name)
Get all folder paths of a directory
folder_paths = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_paths.append(entry_path)
Get all file names of a directory
file_names = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_names.append(file_name)
Get all file paths of a directory
file_paths = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_paths.append(file_path)
For python 3 I'm using this script
import os
root='./'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
for dir in dirlist:
print(dir)
Use os.walk(path)
import os
path = 'C:\\'
for root, directories, files in os.walk(path):
for directory in directories:
print os.path.join(root, directory)
Python 3.x: If you want only the directories in a given directory, try:
import os
search_path = '.' # set your path here.
root, dirs, files = next(os.walk(search_path), ([],[],[]))
print(dirs)
The above example will print out a list of the directories in the current directory like this:
['dir1', 'dir2', 'dir3']
The output contains only the sub-directory names.
If the directory does not have sub-directories, it will print:
[]
os.walk() is a generator method, so use next() to only call it once. The 3-tuple of empty strings is for the error condition when the directory does not contain any sub-directories because the os.walk() generator returns 3-tuples for each layer in the directory tree. Without those, if the directory is empty, next() will raise a StopIteration exception.
For a more compact version:
dirs = next(os.walk(search_path), ([],[],[]))[1]

Get absolute paths of all files in a directory

How do I get the absolute paths of all the files in a directory that could have many sub-folders in Python?
I know os.walk() recursively gives me a list of directories and files, but that doesn't seem to get me what I want.
os.path.abspath makes sure a path is absolute. Use the following helper function:
import os
def absoluteFilePaths(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))
If you have Python 3.4 or newer you can use pathlib (or a third-party backport if you have an older Python version):
import pathlib
for filepath in pathlib.Path(directory).glob('**/*'):
print(filepath.absolute())
If the argument given to os.walk is absolute, then the root dir names yielded during iteration will also be absolute. So, you only need to join them with the filenames:
import os
for root, dirs, files in os.walk(os.path.abspath("../path/to/dir/")):
for file in files:
print(os.path.join(root, file))
Try:
import os
for root, dirs, files in os.walk('.'):
for file in files:
p=os.path.join(root,file)
print p
print os.path.abspath(p)
print
You can use os.path.abspath() to turn relative paths into absolute paths:
file_paths = []
for folder, subs, files in os.walk(rootdir):
for filename in files:
file_paths.append(os.path.abspath(os.path.join(folder, filename)))
Starting with python 3.5 the idiomatic solution would be:
import os
def absolute_file_paths(directory):
path = os.path.abspath(directory)
return [entry.path for entry in os.scandir(path) if entry.is_file()]
This not just reads nicer but also is faster in many cases.
For more details (like ignoring symlinks) see original python docs:
https://docs.python.org/3/library/os.html#os.scandir
All files and folders:
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory)]
Images (.jpg | .png):
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory) if p.endswith(('jpg', 'png'))]
from glob import glob
def absolute_file_paths(directory):
return glob(join(directory, "**"))
Try:
from pathlib import Path
path = 'Desktop'
files = filter(lambda filepath: filepath.is_file(), Path(path).glob('*'))
for file in files:
print(file.absolute())
I wanted to keep the subdirectory details and not the files and wanted only subdirs with one xml file in them. I can do it this way:
for rootDirectory, subDirectories, files in os.walk(eventDirectory):
for subDirectory in subDirectories:
absSubDir = os.path.join(rootDirectory, subDirectory)
if len(glob.glob(os.path.join(absSubDir, "*.xml"))) == 1:
print "Parsing information in " + absSubDir
for root, directories, filenames in os.walk(directory):
for directory in directories:
print os.path.join(root, directory)
for filename in filenames:
if filename.endswith(".JPG"):
print filename
print os.path.join(root,filename)
Try This
pth=''
types=os.listdir(pth)
for type_ in types:
file_names=os.listdir(f'{pth}/{type_}')
file_names=list(map(lambda x:f'{pth}/{type_}/{x}',file_names))
train_folder+=file_names

Pass through the file system from a given directory and print all subfolders and file names (Python)

I need to write a program that passes through the file system from a given directory and prints all subfolders and files names.
os.walk() is your friend.
This works:
#!/usr/bin/env python
import os
def walklevel(some_dir, level):
some_dir = some_dir.rstrip(os.path.sep)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
for root,dirs,files in walklevel('.',3):
for dir in dirs:
tabs='\t'*root.count(os.path.sep)
print tabs,dir
This Python example should help you solve your problem.
for ROOT,DIR,FILES in os.walk("/mypath"):
for file in FILES:
print file
print DIR
rc = performOtherPythonTasks()

Traversing File Directory

this is the first question I am posting on stackoverflow so excuse me if I did something out of the norm.
I am trying to create a python program which traverses a user selected directory to display all file contents of the folders selected. For example: Documents folders has several folders with files inside of them, I am trying to save all files in the Documents folder to an array.
The method below is what I am using to traverse a directory (hoping it is a simple problem)
def saveFilesToArray(dir):
allFiles = []
os.chdir(dir)
for file in glob.glob("*"):
print(file)
if (os.path.isfile(file)):
allFiles.append(file)
elif(os.path.isdir(file)):
print(dir + "/" + file + " is a directory")
allFiles.append(saveFilesToArray(dir + "/" + file))
return allFiles
This will give you just the files:
import os
def list_files(root):
all_files = []
for root, dirs, files in os.walk(root, followlinks=True):
for file in files:
full_path = os.path.join(root, file)
all_files.append(full_path)
return all_files
I hope this is helpful:
import os
def saveFilesToList(theDir):
allFiles = []
for root, dirs, files in os.walk(theDir):
for name in files:
npath = os.path.join(root,name)
if os.path.isfile(npath):
allFiles.append(npath)
return allFiles
Traverses all directories and stores the path to files (that are not directories) in the list. It seems much easier to use this than glob.

Categories