Get absolute path of files in sub-directory - python

I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?

Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt

Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']

Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files

Related

how to merge folders in python?

How to remove part of a tree but keep the files and directories in python?
I have paths like this:
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\A\file1.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\B\C\file2.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\file3.txt"
r"C:\User\Desktop\F2F31DS5FDSF1S2F3DS2F1D23\file4.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\B\C\file5.txt"
r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
I want to move them to:
r"C:\User\Desktop\Document\A\file1.txt"
r"C:\User\Desktop\Document\B\C\file2.txt"
r"C:\User\Desktop\file3.txt"
r"C:\User\Desktop\file4.txt"
r"C:\User\Desktop\Document\B\C\file5.txt"
r"C:\User\Desktop\Document\D\E\file6.txt"
SOme simply dirty way to do it
import os
paths = r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
path = paths.replace(os.sep, '/')
l=(list(path.split("/")))
trim = l[3]
print(trim)
final_path = path.replace("/"+trim,'')
final_path = final_path.replace('/', os.sep)
print(final_path)
output
C:\User\Desktop\Document\D\E\file6.txt
Solution 2
import os
import re
paths = r"C:\User\Desktop\g1sr56g41f2d3s1gf\Document\D\E\file6.txt"
path = paths.replace(os.sep, '/')
l=(list(path.split("/")))
del l[3]
final_path = os.sep.join(l)
print(final_path)
output
C:\User\Desktop\Document\D\E\file6.txt
Here is my code:
#!/usr/bin/python3
import os, shutil
DST = 'Desktop'
toDel = []
for folder_name in os.listdir(DST):
folder = os.path.join(DST, folder_name)
if not os.path.isdir(folder):
continue
for path, _, files in os.walk(folder):
relpath = os.path.join(DST, os.path.relpath(path, folder))
for file in files:
search = os.path.join(path, file)
destination = os.path.join(relpath, file)
if not os.path.exists(relpath):
os.mkdir(relpath)
os.replace(search, destination)
if len(toDel) == 0 or not path.startswith(toDel[-1] + os.sep):
toDel.append(path)
for folder in toDel:
shutil.rmtree(folder)

Find all files with os.walk for a specific directory only

A directory tree looks like this:
DirA--
|
-- Map
|
-- Fig--
|
--file.png
|
-- Data--
|
-- file.xls
|
-- file.csv
There are multiple directories, containing multiple files. I would like to get full path to those files that are found in Data directory only.
This is what I have so far:
dirlist = []
thisdir = os.getcwd()
for root, dirs, files in os.walk(thisdir):
for d in dirs:
if d.startswith("Data"):
dirlist.append(os.path.join(root, d))
To get only Data directory files, you will need to combine root and files.
for root, dirs, files in os.walk(thisdir):
if "Data" in root: # try using in instead of startswith
for f in files:
dirlist.append(os.path.join(root, f))
Trying to do it using 'dirs'
In case of 'dirs', you don't have access to the files. For example, when root is DirA, you will have --Data-- in your dirs list but you will have no access to the files of --Data-- folder.
import os
from os import listdir
from os.path import isfile, join
rootdir = os.getcwd()
folder_name = "Data"
def get_files(path):
onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
return onlyfiles
def get_search_files(start_path, folder_name):
for subdir, dirs, files in start_path:
for x in dirs:
if x == folder_name:
data_folder_path = os.path.join(subdir, x)
dirlist = get_files(data_folder_path)
return dirlist
dirlist = get_search_files(os.walk(rootdir), folder_name)

How to create a python list with the number of file in each sub directory of a directory

I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]

Is there an efficient way to recurse in a directory?

I want to perform:
iterate over the content of the folder
if content is file, append to list
if content is folder, goto 1
if folder name is "depth" or "ir", ignore
I am using python. Can you help?
ended up doing something like:
_files = []
dir = "path/to/folder"
for root, dirs, files in os.walk(dir, topdown=False):
for name in files:
files = os.path.join(root, name)
if root.split("/")[-1] in ["depth", "ir"]:
continue
_files.append(files)
print(_files)
The os.walk() will recurse for you.
import os
res = []
for (root, dirs, files) in os.walk('/path/to/dir'):
# root is the absolute path to the dir, so check if the last part is depth or ir
if root.split("/")[-1] in ["depth", "ir"]:
continue
else:
# files is a list of files
res.extend(files)
print(res)
Try this
import os
basepath ="<path>"
files=[]
def loopover(path):
contents = os.listdir(path)
for c in contents:
d = os.path.join(path,c)
if os.path.isfile(d):
files.append(c)
if os.path.isdir(d):
if (c=="depth" or c=="ir"):
continue
else:
loopover(d)
loopover(basepath)

Python, copy only directories

I have a program that has a list of some files. I have to copy only the directories and the subdirectories from the list to a specified directories and don't need to copy the files. I tried this, but it doesn't work.
def copiarDirs():
items = list.curselection()
desti = tkFileDialog.askdirectory()
for dirs in os.walk(items, topdown=False):
for name in dirs:
#for i in items :
aux=root+"/"+list.get(i)
tryhard=("cp "+str(aux)+" "+str(desti))
os.system(tryhard)
Try this:
import os
def copyDirs(source, destination):
for subdir, dirs, files in os.walk(source):
for f in files:
dir = destination + os.path.join(subdir).split(':')[1]
if not os.path.exists(dir):
os.makedirs(dir)
sourceDir = 'D:\\Work\\'
destDir = 'D:\\Dest\\'
copyDirs(sourceDir, destDir) #calling function

Categories