search folder for subfolders containing certain string, exclude lagging values - python

How do i search a string in python for a value and only return if the string does not contain any leading/lagging values from the search string. white spacing is aceptable, i.e.
I would like to search a folder with sub folders below for "WO1" or "WO1 BLD2 L1 H3 Fitout"
Subfolder:
WO1
WO123
SP10152100 WO137
WO1 BLD2 L1 H3 Fitout
the code i am using returns "SP10152100 WO137"
temp_WO_dir1 = "WO" + wo
path = root + "\\"+ dir
for root, dirs, files in os.walk(path):
for dir in dirs:
print dir
if temp_WO_dir1 in dir:
find = True
sp_path = root + "\\" + dir + "\\3 Finance\\Telstra Invoicing"
print sp_path
break
if root.count(os.sep) - path.count(os.sep) == 0:
del dirs[:]
Update:
Tried the following but not working
if re.match(temp_WO_dir1+"\s" , dir):

WO1 flanked by a word boundry, proceeded by anything, followed by anything.
.*\bWO1\b.*

Related

How to create a python list with the number of file in each sub directory of a directory

I have a main directory(root) which countain 6 sub directory.
I would like to count the number of files present in each sub directory and add all to a simple python list.
For this result : mylist = [497643, 5976, 3698, 12, 456, 745]
I'm blocked on that code:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
list = dirs.append(len(sub_dir))
My trying for the list fill doesn't work and i'm dramaticaly at my best...
Finding a way to iterate sub-directory of a main directory and fill a list with a function applied on each sub directory would sky rocket the speed of my actual data science project!
Thanks for your help
Abel
You can use os.path.isfile and os.path.isdir
res = [len(list(map(os.path.isfile, os.listdir(os.path.join(path, name))))) for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]
print(res)
Using the for loop
res = []
for name in os.listdir(path):
dir_path = os.path.join(path, name)
if os.path.isdir(dir_path):
res.append(len(list(map(os.path.isfile, os.listdir(dir_path)))))
You need to use os.listdir on each subdirectory. The current code simply takes the length of a filepath.
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir( path )
# This would print all the files and directories
for file in dirs:
print (file)
#fill a list with each sub directory number of elements
for sub_dir in dirs:
temp = os.listdir(sub_dir)
list = dirs.append(len(temp))
Adding this line to the code will list out the subdirectory
You were almost there:
import os, sys
list = []
# Open a file
path = "c://root"
dirs = os.listdir(path)
# This would print all the files and directories
for file in dirs:
print(file)
for sub_dir in dirs:
if os.path.isdir(sub_dir):
list.append(len(os.listdir(os.path.join(path, sub_dir))))
print(list)
As an alternative, you can also utilize glob module for this and other related tasks.
I have created a test directory containing 3 subdirectories l,m and k containing 3 test files each.
import os, glob
list = []
path = "test" # you can leave this "." if you want files in the current directory
for root, dirs, files in os.walk(path, topdown=True):
for name in dirs:
list.append(len(glob.glob(root + '/' + name + '/*')))
print(list)
Output :
[3, 3, 3]

How to move batches of files in Python?

I have a folder with 1092 files. I need to move those files to a new directory in batches of 10 (each new folder will have only 10 files each, so max. of 110 folders).
I tried this code, and now the folders have been created, but I can't find any of original files (???). They are neither in the original and newly created folders...
path = "/home/user/Documents/MSc/Imagens/Dataset"
paths = []
for root, dirs, file in os.walk(path):
for name in file:
paths.append(os.path.join(root,name))
start = 0
end = 10
while end <= 1100:
dest = str(os.mkdir("Dataset_" + str(start) + "_" + str(end)))
for i in paths[start:end]:
shutil.move(i, dest)
start += 10
end += 10
Any ideas?
With your move command, you are moving all 10 files to one single folder - but not into that folder as the filenames are missing. And dest is none, since os.mkdir() doesn't return anything.
You need to append the filename to dest:
dataset_dirname = "Dataset_" + str(start) + "_" + str(end)
os.mkdir(dataset_dirname)
dataset_fullpath = os.path.join(path, dataset_dirname)
for i in paths[start:end]:
# append filename to dataset_fullpath and move the file
shutil.move(i, os.path.join(dataset_fullpath , os.path.basename(i)))

Move files to newly created folders more efficiently

I want to group a list of files into sub-folders based on some substring in their name
The files are of the form
pie_riverside_10.png
stack_oak_20.png
scatter_mountain_10.png
and I want to use the starting substring (e.g. pie, stack, scatter) and the integer substring (e.g. 10,20) as the sub-directory name for grouping the files..
The code below is only example- if I actually do that approach I have to create at least 75-80 folders manually with elif statements, which is inefficient.
I am just curious if there is a better way to do this?
EDIT: The current code assumes there is already a folder created, but in real scenario I do not have the folders created and I do not want to have to create 70-80 subfolders- I am trying to make script to create those folders for me.
import shutil
import os
source = 'C:/Users/Xx/Documents/plots/'
pie_charts_10= 'C:/Users/Xx/Documents/pie_charts_10/'
pie_charts_20= 'C:/Users/Xx/Documents/pie_charts_20/'
stack_charts_10 = 'C:/Users/Xx/Documents/stack_charts_10 /'
scatter_charts_10 = 'C:/Users/Xx/Documents/scatter_charts_10 /'
files = os.listdir(source)
for f in files:
if (f.startswith("pie") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), pie_charts_10)
elif (f.startswith("pie") and f.endswith("20.png")):
shutil.move(os.path.join(source, f), pie_charts_20 )
elif (f.startswith("stack") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), stack_charts_10 )
elif (f.startswith("scatter ") and f.endswith("10.png")):
shutil.move(os.path.join(source, f), scatter_charts_10 )
else:
print("No file")
When you are looking to move files of the format prefix_suffix.png into folders prefix_charts_suffix/:
base = "C:/Users/Xx/Documents"
moved_types = ['png']
for f in files:
pf = f.rsplit('.', 1) # filename, prefix
sf = pf[0].split("_") # prefix, whatever, suffix
if len(sf) >= len(pf) > 1 and pf[1] in moved_types:
new_dir = "%s_charts_%s" % (sf[0], sf[-1])
if not os.path.exists(os.path.join(base, new_dir):
os.mkdirs(os.path.join(base, new_dir)
shutil.move(os.path.join(source, f), os.path.join(base, new_dir, f)
Which will work for the general case, grabbing and moving only files which end in moved_types and contain a _ (which allows for splitting of a prefix and suffix).
See the relevant logic on repl.it:
>>>['prefix_garbage_suffix.png', 'bob.sh', 'bob.bill.png', "pie_23.png", "scatter_big_1.png"]
Move prefix_garbage_suffix.png to prefix_charts_suffix
Move pie_23.png to pie_charts_23
Move scatter_big_1.png to scatter_charts_1
EDIT: I've preserved the original answer in case others need a solution where not every file should be moved or you can't infer the folder name from the file names.
If you need I would do something like:
identity_tuples = \
[('pie', '16.png', 'C:/Users/Xx/Documents/pie_charts/'),
('stack', '14.png', 'C:/Users/Xx/Documents/stack_charts/'),
('scatter', '12.png', 'C:/Users/Xx/Documents/scatter_charts/')]
files = os.listdir(source)
for f in files:
for identity_tuple in identity_tuples:
if f.startswith(identity_tuple[0]) and f.endswith(identity_tuple[1]):
shutil.move(os.path.join(source, f), identity_tuple[2])
break
else:
print("No file")
Now you just have to add a new identity tuple: (prefix, suffix, destination) for each type. If the path is common for all the destinations, you can change it to:
identity_tuples = \
[('pie', '16.png', 'pie_charts/'),
('stack', '14.png', 'stack_charts/'),
('scatter', '12.png', 'scatter_charts/')]
files = os.listdir(source)
for f in files:
for identity_tuple in identity_tuples:
if f.startswith(identity_tuple[0]) and f.endswith(identity_tuple[1]):
shutil.move(os.path.join(source, f), "C:/Users/Xx/Documents/" + identity_tuple[2])
break
else:
print("No file")
Note: This is using a for/else loop, in which else is only called if you don't hit a break.
If you need to make the directories, add this in before the shutil.move():
if not os.path.exists(identity_tuple[2]):
os.mkdirs(identity_tuple[2]) # Or "C:/Users/Xx/Documents/" + ...
How about this
# assume you have files in a folder
source = './files' # some directory
files = os.listdir(source)
print files
#['pie_river_1.png', 'pie_mountain_11.png', 'scatter_grass_12.png', 'stack_field_30.png']
Now you want to group them into subfolders based on what they start with and what number they have before the extension
subdir_root = './subfolders'
for f in files:
fig_type = f.split('_')[0]
fig_num = f.split('.png')[0].split('_')[-1]
subdir_name = '%s_charts_%s'%(fig_type, fig_num) # name of dir, e.g. pie_charts_10
subdir = os.path.join( subdir_root, subdir_name ) # path to dir
if not os.path.exists(subdir): # if the dir does not exist , create it
os.makedirs(subdir)
f_src = os.path.join( source, f) # full path to source file
f_dest = os.path.join( subdir, f) # full path to new destination file
shutil.copy( f_src, f_dest ) # I changed to copy so you dont screw up your original files
on my compurer
$ ls ./files:
pie_mountain_11.png pie_river_1.png scatter_grass_12.png stack_field_30.png
$ ls -R ./subfolders
pie_charts_1 pie_charts_11 scatter_charts_12 stack_charts_30
subfolders//pie_charts_1:
pie_river_1.png
subfolders//pie_charts_11:
pie_mountain_11.png
subfolders//scatter_charts_12:
scatter_grass_12.png
subfolders//stack_charts_30:
stack_field_30.png
Obviously, you might have to change the code if edge cases arise.. but this should give you a good start...

Python: search for a file in current directory and all it's parents

Is there an inbuilt module to search for a file in the current directory, as well as all the super-directories?
Without the module, I'll have to list all the files in the current directory, search for the file in question, and recursively move up if the file isn't present. Is there an easier way to do this?
Well this is not so well implemented, but will work
use listdir to get list of files/folders in current directory and then in the list search for you file.
If it exists loop breaks but if it doesn't it goes to parent directory using os.path.dirname and listdir.
if cur_dir == '/' the parent dir for "/" is returned as "/" so if cur_dir == parent_dir it breaks the loop
import os
import os.path
file_name = "test.txt" #file to be searched
cur_dir = os.getcwd() # Dir from where search starts can be replaced with any path
while True:
file_list = os.listdir(cur_dir)
parent_dir = os.path.dirname(cur_dir)
if file_name in file_list:
print "File Exists in: ", cur_dir
break
else:
if cur_dir == parent_dir: #if dir is root dir
print "File not found"
break
else:
cur_dir = parent_dir
Here's another one, using pathlib:
from pathlib import Path
def find_upwards(cwd: Path, filename: str) -> Path | None:
if cwd == Path(cwd.root) or cwd == cwd.parent:
return None
fullpath = cwd / filename
return fullpath if fullpath.exists() else find_upwards(cwd.parent, filename)
# usage example:
find_upwards(Path.cwd(), "helloworld.txt")
(using some Python 3.10 typing syntax here, you can safely skip that if you are using an earlier version)
Another option, using pathlib:
from pathlib import Path
def search_upwards_for_file(filename):
"""Search in the current directory and all directories above it
for a file of a particular name.
Arguments:
---------
filename :: string, the filename to look for.
Returns
-------
pathlib.Path, the location of the first file found or
None, if none was found
"""
d = Path.cwd()
root = Path(d.root)
while d != root:
attempt = d / filename
if attempt.exists():
return attempt
d = d.parent
return None
The parent question was to walk parent directories (not descend into children like the find command):
# walk PARENT directories looking for `filename`:
f = 'filename'
d = os.getcwd()
while d != "/" and f not in os.listdir(d):
d = os.path.abspath(d + "/../")
if os.path.isfile(os.path.join(d,f)):
do_something(f)
Here's a version that uses shell globbing to match multiple files:
# walk PARENT directories looking for any *.csv files,
# stopping when a directory that contains any:
f = '*.csv'
d = os.getcwd()
while d != "/" and not glob.glob(os.path.join(d, f)):
d = os.path.abspath(d + "/../")
files = glob.glob(os.path.join(d,f))
for filename in files:
do_something(filename)
Here a function that does an upward search:
import sys, os, os.path
def up_dir(match,start=None):
"""
Find a parent path producing a match on one of its entries.
Without match an empty string is returned.
:param match: a function returning a bool on a directory entry
:param start: absolute path or None
:return: directory with a match on one of its entries
>>> up_dir(lambda x: False)
''
"""
if start is None:
start = os.getcwd()
if any(match(x) for x in os.listdir(start)):
return start
parent = os.path.dirname(start)
if start == parent:
rootres = start.replace('\\','/').strip('/').replace(':','')
if len(rootres)==1 and sys.platform=='win32':
rootres = ''
return rootres
return up_dir(match,start=parent)
Here is an example that will find all the .csv files in a specified directory "path" and all its root directories and print them:
import os
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".csv"):
path_file = os.path.join(root,file)
print(path_file)
If you want to start at one directory and work your way through the parents then this would work for finding all the .csv files (for example):
import os
import glob
last_dir = ''
dir = r'c:\temp\starting_dir'
os.chdir(dir)
while last_dir != dir:
dir = os.getcwd()
print(glob.glob('*.csv'))
os.chdir('..')
last_dir = os.getcwd()
I was looking for this too, since os.walk is exactly the opposite of what I wanted. That searches subdirectories. I wanted to search backwards through parent directories until I hit the drive root.
Bumming some inspiration from previous answers, below is what I am using. It doesn't require changing the working directory and it has a place for you to do something when you find a match. And you can change how the match is found. I'm using regex but a basic string compare would work fine too.
# Looking for a file with the string 'lowda' in it (like beltalowda or inyalowda)
import os
import re # only if you want to use regex
# Setup initial directories
starting_dir = 'C:\\Users\\AvasaralaC\\Documents\\Projects'
last_dir = ''
curr_dir = starting_dir
filename = ''
# Loop through parent directories until you hit the end or find a match
while last_dir != curr_dir:
for item in os.listdir(curr_dir):
if re.compile('.*lowda.*').search(item): # Here you can do your own comparison
filename = (curr_dir + os.path.sep + item)
break
if filename:
break
last_dir = curr_dir
curr_dir = os.path.abspath(curr_dir + os.path.sep + os.pardir)
Other comparisons you could do are item.lower().endswith('.txt') or some other string comparison.
Just wrote this to find the "images" directory, note '/' is Linux style
dir = os.getcwd()
while dir != '/' and not glob.glob( dir + '/images' ):
dir = os.path.dirname(dir)

Filtering os.walk() dirs and files

I'm looking for a way to include/exclude files patterns and exclude directories from a os.walk() call.
Here's what I'm doing by now:
import fnmatch
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def _filter(paths):
for path in paths:
if os.path.isdir(path) and not path in excludes:
yield path
for pattern in (includes + excludes):
if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern):
yield path
for root, dirs, files in os.walk('/home/paulo-freitas'):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
print(filename)
Is there a better way to do this? How?
This solution uses fnmatch.translate to convert glob patterns to regular expressions (it assumes the includes only is used for files):
import fnmatch
import os
import os.path
import re
includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'
for root, dirs, files in os.walk('/home/paulo-freitas'):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
From docs.python.org:
os.walk(top[, topdown=True[, onerror=None[, followlinks=False]]])
When topdown is True, the caller can modify the dirnames list in-place … this can be used to prune the search …
for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
# excludes can be done with fnmatch.filter and complementary set,
# but it's more annoying to read.
dirs[:] = [d for d in dirs if d not in excludes]
for pat in includes:
for f in fnmatch.filter(files, pat):
print os.path.join(root, f)
I should point out that the above code assumes excludes is a pattern, not a full path. You would need to adjust the list comprehension to filter if os.path.join(root, d) not in excludes to match the OP case.
why fnmatch?
import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
for file in FILES:
if file.endswith(('doc','odt')):
print file
for directory in DIR:
if not directory in excludes :
print directory
not exhaustively tested
dirtools is perfect for your use-case:
from dirtools import Dir
print(Dir('.', exclude_file='.gitignore').files())
Here is one way to do that
import fnmatch
import os
excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
for eachpath in excludes:
if eachpath in path:
continue
else:
for result in [os.path.abspath(os.path.join(path, filename)) for
filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
matches.append(result)
print matches
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
for a in z:
if a[-4:] == exe:
print os.path.join(x,a)
for x in includes:
file_search(excludes[0],x)
This is an example of excluding directories and files with os.walk():
ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
src = os.path.abspath(src)
src_prefix = len(src) + len(os.path.sep)
for root, dirs, files in os.walk(src, onerror=onerror):
for pattern in ignoreDirPatterns:
if pattern in root:
break
else:
#If the above break didn't work, this part will be executed
for file in files:
for pattern in ignoreFilePatterns:
if pattern in file:
break
else:
#If the above break didn't work, this part will be executed
dirpath = os.path.join(dest, root[src_prefix:])
try:
os.makedirs(dirpath,exist_ok=True)
except OSError as e:
if onerror is not None:
onerror(e)
filepath=os.path.join(root,file)
shutil.copy(filepath,dirpath)
continue;#If the above else didn't executed, this will be reached
continue;#If the above else didn't executed, this will be reached
python >=3.2 due to exist_ok in makedirs
The above methods had not worked for me.
So, This is what I came up with an expansion of my original answer to another question.
What worked for me was:
if (not (str(root) + '/').startswith(tuple(exclude_foldr)))
which compiled a path and excluded the tuple of my listed folders.
This gave me the exact result I was looking for.
My goal for this was to keep my mac organized.
I can Search any folder by path, locate & move specific file.types, ignore subfolders and i preemptively prompt the user if they want to move the files.
NOTE: the Prompt is only one time per run and is NOT per file
By Default the prompt defaults to NO when you hit enter instead of [y/N], and will just list the Potential files to be moved.
This is only a snippet of my GitHub Please visit for the total script.
HINT: Read the script below as I added info per line as to what I had done.
#!/usr/bin/env python3
# =============================================================================
# Created On : MAC OSX High Sierra 10.13.6 (17G65)
# Created On : Python 3.7.0
# Created By : Jeromie Kirchoff
# =============================================================================
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# =============================================================================
from os import walk
from os import path
from shutil import move
import getpass
import click
mac_username = getpass.getuser()
includes_file_extensn = ([".jpg", ".gif", ".png", ".jpeg", ])
search_dir = path.dirname('/Users/' + mac_username + '/Documents/')
target_foldr = path.dirname('/Users/' + mac_username + '/Pictures/Archive/')
exclude_foldr = set([target_foldr,
path.dirname('/Users/' + mac_username +
'/Documents/GitHub/'),
path.dirname('/Users/' + mac_username +
'/Documents/Random/'),
path.dirname('/Users/' + mac_username +
'/Documents/Stupid_Folder/'),
])
if click.confirm("Would you like to move files?",
default=False):
question_moving = True
else:
question_moving = False
def organize_files():
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# topdown=True required for filtering.
# "Root" had all info i needed to filter folders not dir...
for root, dir, files in walk(search_dir, topdown=True):
for file in files:
# creating a directory to str and excluding folders that start with
if (not (str(root) + '/').startswith(tuple(exclude_foldr))):
# showcase only the file types looking for
if (file.endswith(tuple(includes_file_extensn))):
# using path.normpath as i found an issue with double //
# in file paths.
filetomove = path.normpath(str(root) + '/' +
str(file))
# forward slash required for both to split
movingfileto = path.normpath(str(target_foldr) + '/' +
str(file))
# Answering "NO" this only prints the files "TO BE Moved"
print('Files To Move: ' + str(filetomove))
# This is using the prompt you answered at the beginning
if question_moving is True:
print('Moving File: ' + str(filetomove) +
"\n To:" + str(movingfileto))
# This is the command that moves the file
move(filetomove, movingfileto)
pass
# The rest is ignoring explicitly and continuing
else:
pass
pass
else:
pass
else:
pass
if __name__ == '__main__':
organize_files()
Example of running my script from terminal:
$ python3 organize_files.py
Exclude list: {'/Users/jkirchoff/Pictures/Archive', '/Users/jkirchoff/Documents/Stupid_Folder', '/Users/jkirchoff/Documents/Random', '/Users/jkirchoff/Documents/GitHub'}
Files found will be moved to this folder:/Users/jkirchoff/Pictures/Archive
Would you like to move files?
No? This will just list the files.
Yes? This will Move your files to the target folder.
[y/N]:
Example of listing files:
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
...etc
Example of moving files:
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
To: /Users/jkirchoff/Pictures/Archive/1.custom-award-768x512.jpg
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
To: /Users/jkirchoff/Pictures/Archive/10351458_318162838331056_9023492155204267542_n.jpg
...

Categories