separate filename after underlining _ os.path - python

In my path /volume1/xx/ are several files with this character A_test1.pdf, B_test2.pdf, ...I want to seperate the test1 part without path and .pdf.
Im newbie so I tried first with full name
but I got only the "*.pdf" as a text.
What is wrong with the path oder placeholder * ?
splitname = os.path.basename('/volume1/xx/*.pdf')
Edit
I got 2019-01-18_RG-Telekom[] from orign ReT_march - I want 2019-01-18_RG-Telekom_march (text after underlining) xx is a folder
here is the whole code:
#!/usr/bin/env python3
import datetime
import glob
import os
import os.path
SOURCE_PATH = '/volume1/xx'
TARGET_PATH = os.path.join(SOURCE_PATH, 'DMS')
def main():
today = datetime.date.today()
splitnames = [os.path.basename(fpath) for fpath in glob.glob("./xx/*.pdf")]
for prefix, name_part in [
('ReA', 'RG-Amazon'),
('GsA', 'GS-Amazon'),
('ReT', 'RG-Telekom'),
('NoE', 'Notiz-EDV'),
]:
filenames = glob.iglob(os.path.join(SOURCE_PATH, prefix + '*.pdf'))
for old_filename in filenames:
new_filename = os.path.join(TARGET_PATH, '{}_{}_{}.pdf'.format(today, name_part, splitnames))
os.rename(old_filename, new_filename)
if __name__ == '__main__':
main()

Use glob, os.path don't know how to process masks, but glob.glob works:
splitnames = [os.path.basename(fpath) for fpath in glob.glob("./**/*.txt")]
splitnames
Out:
['A_test1.pdf', 'B_test2.pdf']
Output of the glob:
glob.glob("./**/*.txt")
Out:
['./some_folder/A_test1.pdf', './another_folder/B_test2.pdf']
Apply os.path.basename to this list and extract basenames, as it shown above.
Edit
If xx in the path volume1/xx/ is just a folder name, not a mask, you should use following expression:
splitnames = [os.path.basename(fpath) for fpath in glob.glob("./xx/*.txt")]
because ./**/ is expression which masks a folder name and it's unnecessary that case.

Related

importing filename and using regex to change the filename and save back

I have files with filenames as "lsud-ifgufib-1234568789.png" I want to rename this file as digits only which are followed by last "-" and then save them back in the folder.
Basically I want the final filename to be the digits that are followed by "-".
~ path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
r = re.compile("(\d+)")
newlist = filter(r.match, filename)
print(newlist)
~
How do I proceed further?
Assumptions:
You want to rename files if the file has a hyphen before the number.
The file may or may not have an extention.
If the file has an extention, preserve it.
Then would you please try the following:
import re, os
path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
m = re.search(r'.*-(\d+.*)', filename)
if m:
os.rename(os.path.join(path, filename), os.path.join(path, m.group(1)))
You could try a regex search followed by a path join:
import re
import os
path = 'C:/Users/abc/downloads'
for filename in os.listdir(path):
os.rename(filename, os.path.join(path, re.search("\d+(?=\D+?$)", filename).group()))
import re
import pathlib
fileName = "lsud-ifgufib-1234568789.png"
_extn = pathlib.Path(fileName).suffix
_digObj = re.compile(r'\d+')
digFileName = ''.join(_digObj.findall(fileName))
replFileName = digFileName + _extn

Wildcard for read_template [duplicate]

This question's answers are a community effort. Edit existing answers to improve this post. It is not currently accepting new answers or interactions.
How can I find all the files in a directory having the extension .txt in python?
You can use glob:
import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
print(file)
or simply os.listdir:
import os
for file in os.listdir("/mydir"):
if file.endswith(".txt"):
print(os.path.join("/mydir", file))
or if you want to traverse directory, use os.walk:
import os
for root, dirs, files in os.walk("/mydir"):
for file in files:
if file.endswith(".txt"):
print(os.path.join(root, file))
Use glob.
>>> import glob
>>> glob.glob('./*.txt')
['./outline.txt', './pip-log.txt', './test.txt', './testingvim.txt']
Something like that should do the job
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.txt'):
print(file)
You can simply use pathlibs glob 1:
import pathlib
list(pathlib.Path('your_directory').glob('*.txt'))
or in a loop:
for txt_file in pathlib.Path('your_directory').glob('*.txt'):
# do something with "txt_file"
If you want it recursive you can use .glob('**/*.txt')
1The pathlib module was included in the standard library in python 3.4. But you can install back-ports of that module even on older Python versions (i.e. using conda or pip): pathlib and pathlib2.
Something like this will work:
>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']
import os
path = 'mypath/path'
files = os.listdir(path)
files_txt = [i for i in files if i.endswith('.txt')]
I like os.walk():
import os
for root, dirs, files in os.walk(dir):
for f in files:
if os.path.splitext(f)[1] == '.txt':
fullpath = os.path.join(root, f)
print(fullpath)
Or with generators:
import os
fileiter = (os.path.join(root, f)
for root, _, files in os.walk(dir)
for f in files)
txtfileiter = (f for f in fileiter if os.path.splitext(f)[1] == '.txt')
for txt in txtfileiter:
print(txt)
Here's more versions of the same that produce slightly different results:
glob.iglob()
import glob
for f in glob.iglob("/mydir/*/*.txt"): # generator, search immediate subdirectories
print f
glob.glob1()
print glob.glob1("/mydir", "*.tx?") # literal_directory, basename_pattern
fnmatch.filter()
import fnmatch, os
print fnmatch.filter(os.listdir("/mydir"), "*.tx?") # include dot-files
Try this this will find all your files recursively:
import glob, os
os.chdir("H:\\wallpaper")# use whatever directory you want
#double\\ no single \
for file in glob.glob("**/*.txt", recursive = True):
print(file)
Python v3.5+
Fast method using os.scandir in a recursive function. Searches for all files with a specified extension in folder and sub-folders. It is fast, even for finding 10,000s of files.
I have also included a function to convert the output to a Pandas Dataframe.
import os
import re
import pandas as pd
import numpy as np
def findFilesInFolderYield(path, extension, containsTxt='', subFolders = True, excludeText = ''):
""" Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
if type(containsTxt) == str: # if a string and not in a list
containsTxt = [containsTxt]
myregexobj = re.compile('\.' + extension + '$') # Makes sure the file extension is at the end and is preceded by a .
try: # Trapping a OSError or FileNotFoundError: File permissions problem I believe
for entry in os.scandir(path):
if entry.is_file() and myregexobj.search(entry.path): #
bools = [True for txt in containsTxt if txt in entry.path and (excludeText == '' or excludeText not in entry.path)]
if len(bools)== len(containsTxt):
yield entry.stat().st_size, entry.stat().st_atime_ns, entry.stat().st_mtime_ns, entry.stat().st_ctime_ns, entry.path
elif entry.is_dir() and subFolders: # if its a directory, then repeat process as a nested function
yield from findFilesInFolderYield(entry.path, extension, containsTxt, subFolders)
except OSError as ose:
print('Cannot access ' + path +'. Probably a permissions error ', ose)
except FileNotFoundError as fnf:
print(path +' not found ', fnf)
def findFilesInFolderYieldandGetDf(path, extension, containsTxt, subFolders = True, excludeText = ''):
""" Converts returned data from findFilesInFolderYield and creates and Pandas Dataframe.
Recursive function to find all files of an extension type in a folder (and optionally in all subfolders too)
path: Base directory to find files
extension: File extension to find. e.g. 'txt'. Regular expression. Or 'ls\d' to match ls1, ls2, ls3 etc
containsTxt: List of Strings, only finds file if it contains this text. Ignore if '' (or blank)
subFolders: Bool. If True, find files in all subfolders under path. If False, only searches files in the specified folder
excludeText: Text string. Ignore if ''. Will exclude if text string is in path.
"""
fileSizes, accessTimes, modificationTimes, creationTimes , paths = zip(*findFilesInFolderYield(path, extension, containsTxt, subFolders))
df = pd.DataFrame({
'FLS_File_Size':fileSizes,
'FLS_File_Access_Date':accessTimes,
'FLS_File_Modification_Date':np.array(modificationTimes).astype('timedelta64[ns]'),
'FLS_File_Creation_Date':creationTimes,
'FLS_File_PathName':paths,
})
df['FLS_File_Modification_Date'] = pd.to_datetime(df['FLS_File_Modification_Date'],infer_datetime_format=True)
df['FLS_File_Creation_Date'] = pd.to_datetime(df['FLS_File_Creation_Date'],infer_datetime_format=True)
df['FLS_File_Access_Date'] = pd.to_datetime(df['FLS_File_Access_Date'],infer_datetime_format=True)
return df
ext = 'txt' # regular expression
containsTxt=[]
path = 'C:\myFolder'
df = findFilesInFolderYieldandGetDf(path, ext, containsTxt, subFolders = True)
path.py is another alternative: https://github.com/jaraco/path.py
from path import path
p = path('/path/to/the/directory')
for f in p.files(pattern='*.txt'):
print f
To get all '.txt' file names inside 'dataPath' folder as a list in a Pythonic way:
from os import listdir
from os.path import isfile, join
path = "/dataPath/"
onlyTxtFiles = [f for f in listdir(path) if isfile(join(path, f)) and f.endswith(".txt")]
print onlyTxtFiles
Python has all tools to do this:
import os
the_dir = 'the_dir_that_want_to_search_in'
all_txt_files = filter(lambda x: x.endswith('.txt'), os.listdir(the_dir))
I did a test (Python 3.6.4, W7x64) to see which solution is the fastest for one folder, no subdirectories, to get a list of complete file paths for files with a specific extension.
To make it short, for this task os.listdir() is the fastest and is 1.7x as fast as the next best: os.walk() (with a break!), 2.7x as fast as pathlib, 3.2x faster than os.scandir() and 3.3x faster than glob.
Please keep in mind, that those results will change when you need recursive results. If you copy/paste one method below, please add a .lower() otherwise .EXT would not be found when searching for .ext.
import os
import pathlib
import timeit
import glob
def a():
path = pathlib.Path().cwd()
list_sqlite_files = [str(f) for f in path.glob("*.sqlite")]
def b():
path = os.getcwd()
list_sqlite_files = [f.path for f in os.scandir(path) if os.path.splitext(f)[1] == ".sqlite"]
def c():
path = os.getcwd()
list_sqlite_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".sqlite")]
def d():
path = os.getcwd()
os.chdir(path)
list_sqlite_files = [os.path.join(path, f) for f in glob.glob("*.sqlite")]
def e():
path = os.getcwd()
list_sqlite_files = [os.path.join(path, f) for f in glob.glob1(str(path), "*.sqlite")]
def f():
path = os.getcwd()
list_sqlite_files = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(".sqlite"):
list_sqlite_files.append( os.path.join(root, file) )
break
print(timeit.timeit(a, number=1000))
print(timeit.timeit(b, number=1000))
print(timeit.timeit(c, number=1000))
print(timeit.timeit(d, number=1000))
print(timeit.timeit(e, number=1000))
print(timeit.timeit(f, number=1000))
Results:
# Python 3.6.4
0.431
0.515
0.161
0.548
0.537
0.274
import os
import sys
if len(sys.argv)==2:
print('no params')
sys.exit(1)
dir = sys.argv[1]
mask= sys.argv[2]
files = os.listdir(dir);
res = filter(lambda x: x.endswith(mask), files);
print res
To get an array of ".txt" file names from a folder called "data" in the same directory I usually use this simple line of code:
import os
fileNames = [fileName for fileName in os.listdir("data") if fileName.endswith(".txt")]
This code makes my life simpler.
import os
fnames = ([file for root, dirs, files in os.walk(dir)
for file in files
if file.endswith('.txt') #or file.endswith('.png') or file.endswith('.pdf')
])
for fname in fnames: print(fname)
Use fnmatch: https://docs.python.org/2/library/fnmatch.html
import fnmatch
import os
for file in os.listdir('.'):
if fnmatch.fnmatch(file, '*.txt'):
print file
A copy-pastable solution similar to the one of ghostdog:
def get_all_filepaths(root_path, ext):
"""
Search all files which have a given extension within root_path.
This ignores the case of the extension and searches subdirectories, too.
Parameters
----------
root_path : str
ext : str
Returns
-------
list of str
Examples
--------
>>> get_all_filepaths('/run', '.lock')
['/run/unattended-upgrades.lock',
'/run/mlocate.daily.lock',
'/run/xtables.lock',
'/run/mysqld/mysqld.sock.lock',
'/run/postgresql/.s.PGSQL.5432.lock',
'/run/network/.ifstate.lock',
'/run/lock/asound.state.lock']
"""
import os
all_files = []
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
all_files.append(os.path.join(root, filename))
return all_files
You can also use yield to create a generator and thus avoid assembling the complete list:
def get_all_filepaths(root_path, ext):
import os
for root, dirs, files in os.walk(root_path):
for filename in files:
if filename.lower().endswith(ext):
yield os.path.join(root, filename)
I suggest you to use fnmatch and the upper method. In this way you can find any of the following:
Name.txt;
Name.TXT;
Name.Txt
.
import fnmatch
import os
for file in os.listdir("/Users/Johnny/Desktop/MyTXTfolder"):
if fnmatch.fnmatch(file.upper(), '*.TXT'):
print(file)
Here's one with extend()
types = ('*.jpg', '*.png')
images_list = []
for files in types:
images_list.extend(glob.glob(os.path.join(path, files)))
Functional solution with sub-directories:
from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk
print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))
In case the folder contains a lot of files or memory is an constraint, consider using generators:
def yield_files_with_extensions(folder_path, file_extension):
for _, _, files in os.walk(folder_path):
for file in files:
if file.endswith(file_extension):
yield file
Option A: Iterate
for f in yield_files_with_extensions('.', '.txt'):
print(f)
Option B: Get all
files = [f for f in yield_files_with_extensions('.', '.txt')]
use Python OS module to find files with specific extension.
the simple example is here :
import os
# This is the path where you want to search
path = r'd:'
# this is extension you want to detect
extension = '.txt' # this can be : .jpg .png .xls .log .....
for root, dirs_list, files_list in os.walk(path):
for file_name in files_list:
if os.path.splitext(file_name)[-1] == extension:
file_name_path = os.path.join(root, file_name)
print file_name
print file_name_path # This is the full path of the filter file
Many users have replied with os.walk answers, which includes all files but also all directories and subdirectories and their files.
import os
def files_in_dir(path, extension=''):
"""
Generator: yields all of the files in <path> ending with
<extension>
\param path Absolute or relative path to inspect,
\param extension [optional] Only yield files matching this,
\yield [filenames]
"""
for _, dirs, files in os.walk(path):
dirs[:] = [] # do not recurse directories.
yield from [f for f in files if f.endswith(extension)]
# Example: print all the .py files in './python'
for filename in files_in_dir('./python', '*.py'):
print("-", filename)
Or for a one off where you don't need a generator:
path, ext = "./python", ext = ".py"
for _, _, dirfiles in os.walk(path):
matches = (f for f in dirfiles if f.endswith(ext))
break
for filename in matches:
print("-", filename)
If you are going to use matches for something else, you may want to make it a list rather than a generator expression:
matches = [f for f in dirfiles if f.endswith(ext)]

Renaming of files for a given pattern

I need help.
there is a folder "C:\TEMP" in this folder are formatted files "IN_ + 7123456789.amr"
It is necessary to make renaming of files for a given pattern.
"IN_ NAME _ DATE-CREATE _ Phone number.amr"
Correspondingly, if a file called "OUT_ + 7123456789.amr" the result format "OUT_ NAME_DATE-CREATE_Phone number.amr"
The question is how to specify the file name has been checked before os.rename and depending on the file name to use the template
import os
path = "C:/TEMP"
for i, filename in enumerate(os.listdir(path)):
os.chdir(path)
os.rename(filename, 'name'+str(i) +'.txt')
i = i+1
Sorry but none of your examples are consistent in your question, I still don't understand what your C:\temp contains...
Well, assuming it would look like:
>>> os.listdir(path)
['IN_ + 7123456789.amr', 'OUT_ + 7123456789.amr']
The example:
import datetime
import re
import os
os.chdir(path)
for filename in os.listdir(path):
match = re.match(r'(IN|OUT)_ \+ (\d+).amr', filename)
if match:
file_date = datetime.datetime.fromtimestamp(os.stat(filename).st_mtime)
destination = '%s_%s_%s_Phone number.amr' % (
match.group(1), # either IN or OUT
match.group(2),
file_date.strftime('%Y%m%d%H%M%S'), # adjust the format at your convenience
)
os.rename(filename, destination)
Will produce:
IN_7123456789_20150721094227_Phone number.amr
OUT_7123456789_20150721094227_Phone number.amr
Other files won't match the re.match pattern and be ignored.

Renaming multiple images with .rename and .endswith

I've been trying to get this to work, but I feel like I'm missing something. There is a large collection of images in a folder that I need to rename just part of the filename. For example, I'm trying to rename the "RJ_200", "RJ_600", and "RJ_60"1 all to the same "RJ_500", while keeping the rest of the filename intact.
Image01.Food.RJ_200.jpg
Image02.Food.RJ_200.jpg
Image03.Basket.RJ_600.jpg
Image04.Basket.RJ_600.jpg
Image05.Cup.RJ_601.jpg
Image06.Cup.RJ_602.jpg
This is what I have so far, but it keeps just giving me the "else" instead of actually renaming any of them:
import os
import fnmatch
import sys
user_profile = os.environ['USERPROFILE']
dir = user_profile + "\Desktop" + "\Working"
print (os.listdir(dir))
for images in dir:
if images.endswith("RJ_***.jpg"):
os.rename("RJ_***.jpg", "RJ_500.jpg")
else:
print ("Arg!")
The Python string method endswith does not do pattern-matching with *, so you're looking for filenames which explicitly include the asterisk character and not finding any.
Try using regular expressions to match your filenames and then building your target filename explicitly:
import os
import re
patt = r'RJ_\d\d\d'
user_profile = os.environ['USERPROFILE']
path = os.path.join(user_profile, "Desktop", "Working")
image_files = os.listdir(path)
for filename in image_files:
flds = filename.split('.')
try:
frag = flds[2]
except IndexError:
continue
if re.match(patt, flds[2]):
from_name = os.path.join(path, filename)
to_name = '.'.join([flds[0], flds[1], 'RJ_500', 'jpg'])
os.rename(from_name, os.path.join(path, to_name))
Note that you need to do your matching with the file's basename and join on the rest of the path later.
You don't need to use .endswith. You can split the image file name up using .split and check the results. Since there are several suffix strings involved, I've put them all into a set for fast membership testing.
import os
import re
import sys
suffixes = {"RJ_200", "RJ_600", "RJ_601"}
new_suffix = "RJ_500"
user_profile = os.environ["USERPROFILE"]
dir = os.path.join(user_profile, "Desktop", "Working")
for image_name in os.listdir(dir):
pieces = image_name.split(".")
if pieces[2] in suffixes:
from_path = os.path.join(dir, image_name)
new_name = ".".join([pieces[0], pieces[1], new_suffix, pieces[3]])
to_path = os.path.join(dir, new_name)
print("renaming {} to {}".format(from_path, to_path))
os.rename(from_path, to_path)

how to get list of files from a directory into text file using python

This question is how to get list of files from a directory into text file using python.
Result in the text file should exactly be like this:
E:\AA\a.jpg
E:\AA\b.jpg
...
How to correct the code below:
WD = "E:\\AA"
import glob
files = glob.glob ('*.jpg')
with open ('infiles.txt', 'w') as in_files:
in_files.write(files +'\n')
glob.glob() returns a list. You have to iterate through it.
WD = "E:\\AA"
import glob
files = glob.glob ('*.jpg')
with open ('infiles.txt', 'w') as in_files:
for eachfile in files: in_files.write(eachfile+'\n')
Input directory path : WD = "E://AA"
You can assign specific file extention that you needed eg: path = WD+'/*.jpg',
if you need all file list then give '' eg: path = WD+'/'
import glob
w_dir = WD + "/*.jpg"
with open("infiles.txt","wb")as fp:
for path in [filepath for filepath in glob.glob(w_dir)]:
fp.write(path+"\n")
Without path, glob.glob returns list of filename (No directory part). To get full path you need to call os.path.abspath(filename) / os.path.realpath(filename) / os.path.join(WD, filename)
>>> glob.glob('*.png')
['gnome-html.png', 'gnome-windows.png', 'gnome-set-time.png', ...]
>>> os.path.abspath('gnome-html.png')
'/usr/share/pixmaps/gnome-html.png'
With path, glob.glob return list of filename with directory part.
>>> glob.glob('/usr/share/pixmaps/*.png')
['/usr/share/pixmaps/gnome-html.png', '/usr/share/pixmaps/gnome-windows.png', '/usr/share/pixmaps/gnome-set-time.png', ...]
import glob
import os
WD = r'E:\AA'
files = glob.glob(os.path.join(WD, '*.jpg'))
with open('infiles.txt', 'w') as in_files:
in_files.writelines(fn + '\n' for fn in files)
or
import glob
import os
WD = r'E:\AA'
os.chdir(WD)
files = glob.glob('*.jpg')
with open('infiles.txt', 'w') as in_files:
in_files.writelines(os.path.join(WD, fn) + '\n' for fn in files)
Here is a two line simple solution:
import os
filee = open('all_names.txt','w')
given_dir = 'the_dierctory'
[filee.write(os.path.join(os.path.dirname(os.path.abspath(__file__)),given_dir,i)+'\n') for i in os.listdir(given_dir)]
where given_dir is the directory name. The output is a text file (all_names.txt) where each line in the file is the full path to all files and directories in the given_dir.

Categories