reading content of a directory and its subdirectories with python

reading content of a directory and its subdirectories with python - python

i have directory path/to/dir that contains 10 subdirectories. there files in all of the subdirectories. I want to append these files in every subdirectory into a unique list.
How can I do this?

What i understood is you just wanted to list all the filenames from a particular directory and its sub-directory.
list1=[] #Empty list
for root, dirs, files in os.walk(fr"path\\to\\directory", topdown=False):
#Listing Directory
for name in files:
a=os.path.join(root, name)
print(a)
list1.append(a)

import os
path="C://Users//<user_name>//Desktop"
obj = os.scandir()
print("Files and Directories in '% s':" % path)
for entry in obj:
if entry.is_dir() or entry.is_file():
print(entry.name)
For listing whole directory:
import os
def list_dirs(path):
obj = os.scandir(path)
for entry in obj:
if entry.is_dir():
print(1,entry.path)
list_dirs(entry)
elif entry.is_file():
print(2,entry.name)
else:
break
list_dirs(path)
It basically use's os.scandir method which provides both speed and more benefit like file meta data ect, with recursion to obtain list of whole directory.
also a list comprehension method for #FaraazKurawle 's answer:
def list_dir(path):
path=fr"{path}"
list_1=[os.path.join(root, name) for root, dirs, files in os.walk(path, topdown=False) for name in files]
return list_1
some helpful links:
List all directory
Os.scandir
os.scandir vs os.listdir vs os.walk

import glob
path = '\\path\\to\\dir\\'
files = [f for f in glob.glob(path + "**/*", recursive=True)]

Related

Recursive file walk only going one depth down

I am trying to create code for an assignment that can walk down a directory and return all files
I am having trouble with multilevel folders, such as
folder1
---> folder2
------->foo.txt
I have the following code
def find_larger(path, max_n_results=10):
files = []
print(path)
path_files = os.listdir(path)
for file in path_files:
if os.path.isdir(file):
files += find_larger(os.path.join(path, file))
files.append(file)
return files
print(find_larger('.'))
However, If I were to run that code I would get the following result
[folder1, folder2]
I have run through this is a debugger and the program is not detecting the second directory to actually be a directory.
How can I get the program to walk all the way through the directory.
Note, I am not allowed to user os.walk

os.path.isdir() takes a full path, you are only giving it the relative name. Create the path first, then test that:
def find_larger(path, max_n_results=10):
files = []
print(path)
path_files = os.listdir(path)
for file in path_files:
subpath = os.path.join(path, file)
if os.path.isdir(subpath):
files += find_larger(subpath)
files.append(subpath)
return files
However, you are re-inventing a wheel here, just use the os.walk() function to list directory contents :
def find_larger(path, max_n_results=10):
files = []
print(path)
for dirpath, dirnames, filenames in os.walk(path):
files += (os.join(dirpath, filename) for filename in filenames)
return files

Python folder names in the directory

how can i get the folder names existing in a directory using Python ?
I want to save all the subfolders into a list to work with the names after that but i dont know how to read the subfolder names ?
Thanks for you help

You can use os.walk()
# !/usr/bin/python
import os
directory_list = list()
for root, dirs, files in os.walk("/path/to/your/dir", topdown=False):
for name in dirs:
directory_list.append(os.path.join(root, name))
print directory_list
EDIT
If you only want the first level and not actually "walk" through the subdirectories, it is even less code:
import os
root, dirs, files = os.walk("/path/to/your/dir").next()
print dirs
This is not really what os.walk is made for. If you really only want one level of subdirectories, you can also use os.listdir() like Yannik Ammann suggested:
root='/path/to/my/dir'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
print dirlist

Starting with Python 3.4, you can also use the new pathlib module:
from pathlib import Path
p = Path('some/folder')
subdirectories = [x for x in p.iterdir() if x.is_dir()]
print(subdirectories)

You can use os.listdir() here a link to the docs
Warning returns files and directories
example:
import os
path = 'pyth/to/dir/'
dir_list = os.listdir(path)
update: you need to check if the returned names are directories or files
import os
path = 'pyth/to/dir/'
# list of all content in a directory, filtered so only directories are returned
dir_list = [directory for directory in os.listdir(path) if os.path.isdir(path+directory)]

You should import os first.
import os
files=[]
files = [f for f in sorted(os.listdir(FileDirectoryPath))]
This would give you list with all files in the FileDirectoryPath sorted.

I use os.listdir
Get all folder names of a directory
folder_names = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_names.append(entry_name)
Get all folder paths of a directory
folder_paths = []
for entry_name in os.listdir(MYDIR):
entry_path = os.path.join(MYDIR, entry_name)
if os.path.isdir(entry_path):
folder_paths.append(entry_path)
Get all file names of a directory
file_names = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_names.append(file_name)
Get all file paths of a directory
file_paths = []
for file_name in os.listdir(MYDIR):
file_path = os.path.join(MYDIR, file_name)
if os.path.isfile(file_path):
file_paths.append(file_path)

For python 3 I'm using this script
import os
root='./'
dirlist = [ item for item in os.listdir(root) if os.path.isdir(os.path.join(root, item)) ]
for dir in dirlist:
print(dir)

Use os.walk(path)
import os
path = 'C:\\'
for root, directories, files in os.walk(path):
for directory in directories:
print os.path.join(root, directory)

Python 3.x: If you want only the directories in a given directory, try:
import os
search_path = '.' # set your path here.
root, dirs, files = next(os.walk(search_path), ([],[],[]))
print(dirs)
The above example will print out a list of the directories in the current directory like this:
['dir1', 'dir2', 'dir3']
The output contains only the sub-directory names.
If the directory does not have sub-directories, it will print:
[]
os.walk() is a generator method, so use next() to only call it once. The 3-tuple of empty strings is for the error condition when the directory does not contain any sub-directories because the os.walk() generator returns 3-tuples for each layer in the directory tree. Without those, if the directory is empty, next() will raise a StopIteration exception.
For a more compact version:
dirs = next(os.walk(search_path), ([],[],[]))[1]

Get absolute paths of all files in a directory

How do I get the absolute paths of all the files in a directory that could have many sub-folders in Python?
I know os.walk() recursively gives me a list of directories and files, but that doesn't seem to get me what I want.

os.path.abspath makes sure a path is absolute. Use the following helper function:
import os
def absoluteFilePaths(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))

If you have Python 3.4 or newer you can use pathlib (or a third-party backport if you have an older Python version):
import pathlib
for filepath in pathlib.Path(directory).glob('**/*'):
print(filepath.absolute())

If the argument given to os.walk is absolute, then the root dir names yielded during iteration will also be absolute. So, you only need to join them with the filenames:
import os
for root, dirs, files in os.walk(os.path.abspath("../path/to/dir/")):
for file in files:
print(os.path.join(root, file))

Try:
import os
for root, dirs, files in os.walk('.'):
for file in files:
p=os.path.join(root,file)
print p
print os.path.abspath(p)
print

You can use os.path.abspath() to turn relative paths into absolute paths:
file_paths = []
for folder, subs, files in os.walk(rootdir):
for filename in files:
file_paths.append(os.path.abspath(os.path.join(folder, filename)))

Starting with python 3.5 the idiomatic solution would be:
import os
def absolute_file_paths(directory):
path = os.path.abspath(directory)
return [entry.path for entry in os.scandir(path) if entry.is_file()]
This not just reads nicer but also is faster in many cases.
For more details (like ignoring symlinks) see original python docs:
https://docs.python.org/3/library/os.html#os.scandir

All files and folders:
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory)]
Images (.jpg | .png):
x = [os.path.abspath(os.path.join(directory, p)) for p in os.listdir(directory) if p.endswith(('jpg', 'png'))]

from glob import glob
def absolute_file_paths(directory):
return glob(join(directory, "**"))

Try:
from pathlib import Path
path = 'Desktop'
files = filter(lambda filepath: filepath.is_file(), Path(path).glob('*'))
for file in files:
print(file.absolute())

I wanted to keep the subdirectory details and not the files and wanted only subdirs with one xml file in them. I can do it this way:
for rootDirectory, subDirectories, files in os.walk(eventDirectory):
for subDirectory in subDirectories:
absSubDir = os.path.join(rootDirectory, subDirectory)
if len(glob.glob(os.path.join(absSubDir, "*.xml"))) == 1:
print "Parsing information in " + absSubDir

for root, directories, filenames in os.walk(directory):
for directory in directories:
print os.path.join(root, directory)
for filename in filenames:
if filename.endswith(".JPG"):
print filename
print os.path.join(root,filename)

Try This
pth=''
types=os.listdir(pth)
for type_ in types:
file_names=os.listdir(f'{pth}/{type_}')
file_names=list(map(lambda x:f'{pth}/{type_}/{x}',file_names))
train_folder+=file_names

Pass through the file system from a given directory and print all subfolders and file names (Python)

I need to write a program that passes through the file system from a given directory and prints all subfolders and files names.

os.walk() is your friend.

This works:
#!/usr/bin/env python
import os
def walklevel(some_dir, level):
some_dir = some_dir.rstrip(os.path.sep)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
for root,dirs,files in walklevel('.',3):
for dir in dirs:
tabs='\t'*root.count(os.path.sep)
print tabs,dir

This Python example should help you solve your problem.
for ROOT,DIR,FILES in os.walk("/mypath"):
for file in FILES:
print file
print DIR
rc = performOtherPythonTasks()

Traversing File Directory

this is the first question I am posting on stackoverflow so excuse me if I did something out of the norm.
I am trying to create a python program which traverses a user selected directory to display all file contents of the folders selected. For example: Documents folders has several folders with files inside of them, I am trying to save all files in the Documents folder to an array.
The method below is what I am using to traverse a directory (hoping it is a simple problem)
def saveFilesToArray(dir):
allFiles = []
os.chdir(dir)
for file in glob.glob("*"):
print(file)
if (os.path.isfile(file)):
allFiles.append(file)
elif(os.path.isdir(file)):
print(dir + "/" + file + " is a directory")
allFiles.append(saveFilesToArray(dir + "/" + file))
return allFiles

This will give you just the files:
import os
def list_files(root):
all_files = []
for root, dirs, files in os.walk(root, followlinks=True):
for file in files:
full_path = os.path.join(root, file)
all_files.append(full_path)
return all_files

I hope this is helpful:
import os
def saveFilesToList(theDir):
allFiles = []
for root, dirs, files in os.walk(theDir):
for name in files:
npath = os.path.join(root,name)
if os.path.isfile(npath):
allFiles.append(npath)
return allFiles
Traverses all directories and stores the path to files (that are not directories) in the list. It seems much easier to use this than glob.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

reading content of a directory and its subdirectories with python - python

i have directory path/to/dir that contains 10 subdirectories. there files in all of the subdirectories. I want to append these files in every subdirectory into a unique list. How can I do this?

What i understood is you just wanted to list all the filenames from a particular directory and its sub-directory. list1=[] #Empty list for root, dirs, files in os.walk(fr"path\\to\\directory", topdown=False): #Listing Directory for name in files: a=os.path.join(root, name) print(a) list1.append(a)

import glob path = '\\path\\to\\dir\\' files = [f for f in glob.glob(path + "**/*", recursive=True)]

Related

Recursive file walk only going one depth down

Python folder names in the directory

Get absolute paths of all files in a directory

Pass through the file system from a given directory and print all subfolders and file names (Python)

Traversing File Directory

Categories

Resources