Related
I have a directory that consists of other directories. Each of those sub-directories have files that I need the absolute path for. For example, let's say the parent directory is /home/Documents and each of the sub-directories is 1, 2,..., 10. I have tried something like files = [os.path.abspath(f) for d in os.listdir('/home/Documents') for f in os.listdir(d)], but that gets me something like (for a file) /home/Documents/file1, when it should be /home/Documents/1/file1. Is there a way to do this with the sub-directory in there?
Yes. You can try os.walk.
Consider the following path which has 3 sub directories: '1', '2', '3'.
- '1' has a file ("123.txt")
- '2' is empty
- '3' has 2 files ("123.txt", "1234.txt")
path = r"C:\Users\hvasala\Documents\Udemy Course\project\del"
import os
for dirname, _, filenames in os.walk(path):
for filename in filenames:
print(os.path.join(dirname, filename))
Output:
C:\Users\hvasala\Documents\Udemy Course\project\del\1\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\123.txt
C:\Users\hvasala\Documents\Udemy Course\project\del\3\1234.txt
Use os.path.join:
root = '/tmp/project'
files = [os.path.join(root, d, f) for d in os.listdir(root) for f in os.listdir(os.path.join(root, d))]
print files
Output:
['/tmp/project/auth/__init__.py', '/tmp/project/controllers/__init__.py']
Try this code below:
import os
def find_file_name(path=None):
paths = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
if os.path.isdir(full_path):
paths += find_file_name(path=full_path)
else:
paths.append(full_path)
else:
return paths
def find_file_name(path=None, extention=".pdf"):
pdf_files = []
if not path:
path = os.getcwd()
for element in os.listdir(path):
full_path = os.path.join(path, element)
file = full_path.split("/")[-1]
if os.path.isdir(full_path):
pdf_files += find_file_name(path=full_path)
else:
if extention in file:
pdf_files.append(file)
return pdf_files
I want to rename all the files in test folder as 1, 2, 3 and so on
import os, sys, path
path = r"F:\test"
dirs = os.listdir(path)
print(dirs)
count = 1
for files in dirs:
str1 = str(count)
os.rename(files, str1)
count += 1
but my code giving me this error:
WindowsError: [Error 2] The system cannot find the file specified
dirs is a list of paths, and iterating through it won't give you the contents of the directories. You would need another os.listdir for that.
Also, to rename the files, you have to go through each of them.
A better solution would've been:
import os
count = 1
path = r"F:\test"
for root, dirs, files in os.walk(path):
for filename in files:
os.rename(os.path.join(root, filename), os.path.join(root, str(count)))
count += 1
Just add one line to change bthe current working directory.
import os, sys, path
path = r"F:\test"
dirs = os.listdir(path)
os.chdir(path) # Change the current working directory
print(dirs)
count = 1
for files in dirs:
str1 = str(count)
os.rename(files, str1)
count += 1
Is there an inbuilt module to search for a file in the current directory, as well as all the super-directories?
Without the module, I'll have to list all the files in the current directory, search for the file in question, and recursively move up if the file isn't present. Is there an easier way to do this?
Well this is not so well implemented, but will work
use listdir to get list of files/folders in current directory and then in the list search for you file.
If it exists loop breaks but if it doesn't it goes to parent directory using os.path.dirname and listdir.
if cur_dir == '/' the parent dir for "/" is returned as "/" so if cur_dir == parent_dir it breaks the loop
import os
import os.path
file_name = "test.txt" #file to be searched
cur_dir = os.getcwd() # Dir from where search starts can be replaced with any path
while True:
file_list = os.listdir(cur_dir)
parent_dir = os.path.dirname(cur_dir)
if file_name in file_list:
print "File Exists in: ", cur_dir
break
else:
if cur_dir == parent_dir: #if dir is root dir
print "File not found"
break
else:
cur_dir = parent_dir
Here's another one, using pathlib:
from pathlib import Path
def find_upwards(cwd: Path, filename: str) -> Path | None:
if cwd == Path(cwd.root) or cwd == cwd.parent:
return None
fullpath = cwd / filename
return fullpath if fullpath.exists() else find_upwards(cwd.parent, filename)
# usage example:
find_upwards(Path.cwd(), "helloworld.txt")
(using some Python 3.10 typing syntax here, you can safely skip that if you are using an earlier version)
Another option, using pathlib:
from pathlib import Path
def search_upwards_for_file(filename):
"""Search in the current directory and all directories above it
for a file of a particular name.
Arguments:
---------
filename :: string, the filename to look for.
Returns
-------
pathlib.Path, the location of the first file found or
None, if none was found
"""
d = Path.cwd()
root = Path(d.root)
while d != root:
attempt = d / filename
if attempt.exists():
return attempt
d = d.parent
return None
The parent question was to walk parent directories (not descend into children like the find command):
# walk PARENT directories looking for `filename`:
f = 'filename'
d = os.getcwd()
while d != "/" and f not in os.listdir(d):
d = os.path.abspath(d + "/../")
if os.path.isfile(os.path.join(d,f)):
do_something(f)
Here's a version that uses shell globbing to match multiple files:
# walk PARENT directories looking for any *.csv files,
# stopping when a directory that contains any:
f = '*.csv'
d = os.getcwd()
while d != "/" and not glob.glob(os.path.join(d, f)):
d = os.path.abspath(d + "/../")
files = glob.glob(os.path.join(d,f))
for filename in files:
do_something(filename)
Here a function that does an upward search:
import sys, os, os.path
def up_dir(match,start=None):
"""
Find a parent path producing a match on one of its entries.
Without match an empty string is returned.
:param match: a function returning a bool on a directory entry
:param start: absolute path or None
:return: directory with a match on one of its entries
>>> up_dir(lambda x: False)
''
"""
if start is None:
start = os.getcwd()
if any(match(x) for x in os.listdir(start)):
return start
parent = os.path.dirname(start)
if start == parent:
rootres = start.replace('\\','/').strip('/').replace(':','')
if len(rootres)==1 and sys.platform=='win32':
rootres = ''
return rootres
return up_dir(match,start=parent)
Here is an example that will find all the .csv files in a specified directory "path" and all its root directories and print them:
import os
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".csv"):
path_file = os.path.join(root,file)
print(path_file)
If you want to start at one directory and work your way through the parents then this would work for finding all the .csv files (for example):
import os
import glob
last_dir = ''
dir = r'c:\temp\starting_dir'
os.chdir(dir)
while last_dir != dir:
dir = os.getcwd()
print(glob.glob('*.csv'))
os.chdir('..')
last_dir = os.getcwd()
I was looking for this too, since os.walk is exactly the opposite of what I wanted. That searches subdirectories. I wanted to search backwards through parent directories until I hit the drive root.
Bumming some inspiration from previous answers, below is what I am using. It doesn't require changing the working directory and it has a place for you to do something when you find a match. And you can change how the match is found. I'm using regex but a basic string compare would work fine too.
# Looking for a file with the string 'lowda' in it (like beltalowda or inyalowda)
import os
import re # only if you want to use regex
# Setup initial directories
starting_dir = 'C:\\Users\\AvasaralaC\\Documents\\Projects'
last_dir = ''
curr_dir = starting_dir
filename = ''
# Loop through parent directories until you hit the end or find a match
while last_dir != curr_dir:
for item in os.listdir(curr_dir):
if re.compile('.*lowda.*').search(item): # Here you can do your own comparison
filename = (curr_dir + os.path.sep + item)
break
if filename:
break
last_dir = curr_dir
curr_dir = os.path.abspath(curr_dir + os.path.sep + os.pardir)
Other comparisons you could do are item.lower().endswith('.txt') or some other string comparison.
Just wrote this to find the "images" directory, note '/' is Linux style
dir = os.getcwd()
while dir != '/' and not glob.glob( dir + '/images' ):
dir = os.path.dirname(dir)
I'm stuck. I'm moving folders around on our network which all have a unique ID into a central location. There are a few folders with typos and therefore do not match a unique ID in the central location. I have found the correct IDS but I need to rename these folders before I move them. For example, I have created an excel spreadsheet with the wrong unique ID and in a separate column have the correct ID. Now, I want to rename the folders with the correct ID and then transfer those folders to the central location. My code is....rough because I can't think of a good way to do it. I feel like using a list is the way to go, but since my code is iterating through a folder I'm not sure how to achieve this
Edit: I think something like this may be what I'm looking for
Ex:
In Folder A : A file named 12334 SHOULD be renamed 1234. Then moved to the base directory with in folder 1234.
Heres my code:
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
print ARN
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
newname = re.sub(372911000002001,372911000003100,ARN)
newname =re.sub(372809000001400,372909000001400,ARN)
newname =re.sub(372809000001500,372909000001500,ARN)
newname =re.sub(372809000001700,372909000001700,ARN)
newname = re.sub(372812000006800,372912000006800,ARN)
newname =re.sub(372812000006900,372912000006900,ARN)
newname =re.sub(372812000007000,372912000007000,ARN)
newname =re.sub(372812000007100,372912000007100,ARN)
newname =re.sub(372812000007200,372912000007200,ARN)
newname =re.sub(372812000007300,372912000007300,ARN)
newname =re.sub(372812000007400,372912000007400,ARN)
newname =re.sub(372812000007500,372912000007500,ARN)
newname =re.sub(372812000007600,372912000007600,ARN)
newname =re.sub(372812000007700,372912000007700,ARN)
newname =re.sub(372812000011100,372912000011100,ARN)
os.rename(os.path.join(movdir, ARN, extension ),
os.path.join(movdir, newname, extension))
oldpath = os.path.join(root, newname)
print ARN, "to", newname
newpath = basedir + "\\" + newname + subfolder
shutil.copy(oldpath, newpath)
print "Copied"
except:
print ("Error occurred")
Thanks to the answers below here is my final code:
import arcpy
import os
import re
import sys
import traceback
import collections
import shutil
movdir = r"C:\Scans"
basedir = r"C:\Links"
subfolder = "\Private Drain Connections"
import string
l = ['372911000002001',
'372809000001400',
'372809000001500',
'372809000001700',
'37292200000800'
]
l2 = ['372911000003100',
'372909000001400',
'372909000001500',
'372909000001700',
'372922000000800'
]
try:
#Walk through all files in the directory that contains the files to copy
for root, dirs, files in os.walk(movdir):
for filename in files:
#find the name location and name of files
path = os.path.join(root, filename)
#file name and extension
ARN, extension = os.path.splitext(filename)
oldname = str(ARN)
#Location of the corresponding folder in the new directory
link = os.path.join(basedir, ARN)
if not os.path.exists(link):
for ii, jj in zip(l, l2):
newname = re.sub(ii,jj, ARN)
newname = str(newname)
print path
newpath = os.path.join(root, oldname) + extension
print "new name", newpath
os.rename(path, newpath)
print "Renaming"
newpath2 = basedir + "\\" + newname + subfolder
shutil.copy(newpath, newpath2)
print "Copied"
if newname != ARN:
break
else:
continue
except:
print ("Error occurred")
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
pymsg = "PYTHON ERRORS:\nTraceback Info:\n" + tbinfo + "\nError Info:\n " + \
str(sys.exc_type)+ ": " + str(sys.exc_value) + "\n"
msgs = "GP ERRORS:\n" + arcpy.GetMessages(2 )+ "\n"
print (pymsg)
print (msgs)
For me the way to go is to read both lists into list objects:
list1 = ["372911000002001", "372809000001400", "372809000001500"]
list2 = ["372911000003100", "372909000001400", "372909000001500"]
for ii, jj in zip(list1, list2):
newname = re.sub(ii,jj,ARN) #re.sub returns ARN if no substitution done
if newname != ARN:
break
An idea: try to convert the id's to strings. I mean:
newname = re.sub('372911000002001','372911000003100',ARN)
Hope it helps!
I'm looking for a way to include/exclude files patterns and exclude directories from a os.walk() call.
Here's what I'm doing by now:
import fnmatch
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def _filter(paths):
for path in paths:
if os.path.isdir(path) and not path in excludes:
yield path
for pattern in (includes + excludes):
if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern):
yield path
for root, dirs, files in os.walk('/home/paulo-freitas'):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
print(filename)
Is there a better way to do this? How?
This solution uses fnmatch.translate to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
From docs.python.org:
os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]])
When topdown is True, the caller can modify the dirnames list in-place … this can be used to prune the search …
for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
# excludes can be done with fnmatch.filter and complementary set,
# but it's more annoying to read.
dirs[:] = [d for d in dirs if d not in excludes]
for pat in includes:
for f in fnmatch.filter(files, pat):
print os.path.join(root, f)
I should point out that the above code assumes excludes is a pattern, not a full path. You would need to adjust the list comprehension to filter if os.path.join(root, d) not in excludes to match the OP case.
why fnmatch?
import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
for file in FILES:
if file.endswith(('doc','odt')):
print file
for directory in DIR:
if not directory in excludes :
print directory
not exhaustively tested
dirtools is perfect for your use-case:
from dirtools import Dir
print(Dir('.', exclude_file='.gitignore').files())
Here is one way to do that
import fnmatch
import os
excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
for eachpath in excludes:
if eachpath in path:
continue
else:
for result in [os.path.abspath(os.path.join(path, filename)) for
filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
matches.append(result)
print matches
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
for a in z:
if a[-4:] == exe:
print os.path.join(x,a)
for x in includes:
file_search(excludes[0],x)
This is an example of excluding directories and files with os.walk():
ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
src = os.path.abspath(src)
src_prefix = len(src) + len(os.path.sep)
for root, dirs, files in os.walk(src, onerror=onerror):
for pattern in ignoreDirPatterns:
if pattern in root:
break
else:
#If the above break didn't work, this part will be executed
for file in files:
for pattern in ignoreFilePatterns:
if pattern in file:
break
else:
#If the above break didn't work, this part will be executed
dirpath = os.path.join(dest, root[src_prefix:])
try:
os.makedirs(dirpath,exist_ok=True)
except OSError as e:
if onerror is not None:
onerror(e)
filepath=os.path.join(root,file)
shutil.copy(filepath,dirpath)
continue;#If the above else didn't executed, this will be reached
continue;#If the above else didn't executed, this will be reached
python >=3.2 due to exist_ok in makedirs
The above methods had not worked for me.
So, This is what I came up with an expansion of my original answer to another question.
What worked for me was:
if (not (str(root) + '/').startswith(tuple(exclude_foldr)))
which compiled a path and excluded the tuple of my listed folders.
This gave me the exact result I was looking for.
My goal for this was to keep my mac organized.
I can Search any folder by path, locate & move specific file.types, ignore subfolders and i preemptively prompt the user if they want to move the files.
NOTE: the Prompt is only one time per run and is NOT per file
By Default the prompt defaults to NO when you hit enter instead of [y/N], and will just list the Potential files to be moved.
This is only a snippet of my GitHub Please visit for the total script.
HINT: Read the script below as I added info per line as to what I had done.
#!/usr/bin/env python3
# =============================================================================
# Created On : MAC OSX High Sierra 10.13.6 (17G65)
# Created On : Python 3.7.0
# Created By : Jeromie Kirchoff
# =============================================================================
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# =============================================================================
from os import walk
from os import path
from shutil import move
import getpass
import click
mac_username = getpass.getuser()
includes_file_extensn = ([".jpg", ".gif", ".png", ".jpeg", ])
search_dir = path.dirname('/Users/' + mac_username + '/Documents/')
target_foldr = path.dirname('/Users/' + mac_username + '/Pictures/Archive/')
exclude_foldr = set([target_foldr,
path.dirname('/Users/' + mac_username +
'/Documents/GitHub/'),
path.dirname('/Users/' + mac_username +
'/Documents/Random/'),
path.dirname('/Users/' + mac_username +
'/Documents/Stupid_Folder/'),
])
if click.confirm("Would you like to move files?",
default=False):
question_moving = True
else:
question_moving = False
def organize_files():
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# topdown=True required for filtering.
# "Root" had all info i needed to filter folders not dir...
for root, dir, files in walk(search_dir, topdown=True):
for file in files:
# creating a directory to str and excluding folders that start with
if (not (str(root) + '/').startswith(tuple(exclude_foldr))):
# showcase only the file types looking for
if (file.endswith(tuple(includes_file_extensn))):
# using path.normpath as i found an issue with double //
# in file paths.
filetomove = path.normpath(str(root) + '/' +
str(file))
# forward slash required for both to split
movingfileto = path.normpath(str(target_foldr) + '/' +
str(file))
# Answering "NO" this only prints the files "TO BE Moved"
print('Files To Move: ' + str(filetomove))
# This is using the prompt you answered at the beginning
if question_moving is True:
print('Moving File: ' + str(filetomove) +
"\n To:" + str(movingfileto))
# This is the command that moves the file
move(filetomove, movingfileto)
pass
# The rest is ignoring explicitly and continuing
else:
pass
pass
else:
pass
else:
pass
if __name__ == '__main__':
organize_files()
Example of running my script from terminal:
$ python3 organize_files.py
Exclude list: {'/Users/jkirchoff/Pictures/Archive', '/Users/jkirchoff/Documents/Stupid_Folder', '/Users/jkirchoff/Documents/Random', '/Users/jkirchoff/Documents/GitHub'}
Files found will be moved to this folder:/Users/jkirchoff/Pictures/Archive
Would you like to move files?
No? This will just list the files.
Yes? This will Move your files to the target folder.
[y/N]:
Example of listing files:
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
...etc
Example of moving files:
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
To: /Users/jkirchoff/Pictures/Archive/1.custom-award-768x512.jpg
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
To: /Users/jkirchoff/Pictures/Archive/10351458_318162838331056_9023492155204267542_n.jpg
...