Python: change part (single directory name) of path - python

What would be the best way to change a single directory name (only the first occurence) within a path?
Example:
source_path = "/path/to/a/directory/or/file.txt"
target_path = "/path/to/different/directory/or/file.txt"
I this case, the instruction would be: "replace the first directory of the name 'a' with a directory of the name 'different'"
I can think of methods where I would split up the path in its single parts first, then find the first "a", replace it and join it again. But I wonder if there is a more elegant way to deal with this. Maybe a built-in python function.

There is a function called os.path.split that can split a path into the final part and all leading up to it but that's the closest your going to get. Therefore the most elegant thing we can do is create a function that calls that continuously:
import os, sys
def splitall(path):
allparts = []
while 1:
parts = os.path.split(path)
if parts[0] == path: # sentinel for absolute paths
allparts.insert(0, parts[0])
break
elif parts[1] == path: # sentinel for relative paths
allparts.insert(0, parts[1])
break
else:
path = parts[0]
allparts.insert(0, parts[1])
return allparts
Then you could use it like this, joining back together with os.path.join:
>>> source_path = '/path/to/a/directory/or/file'
>>> temp = splitall(source_path)
>>> temp
['path', 'to', 'a', 'directory', 'or', 'file']
>>> temp[2] = 'different'
>>> target_path = os.path.join(*temp)
>>> target_path
'path/to/different/directory/or/file'

If I understand what you want to say, you want this:
source_path = "/path/to/a/directory/or/file.txt"
target_path = source_path.replace("/a/", "/different/", 1)
print target_path

Use https://docs.python.org/3/library/pathlib.html#module-pathlib:
>>> from pathlib import PurePath
>>> import os
>>> path = PurePath("/path/to/a/directory/or/file.txt")
>>> path.parts
('/', 'path', 'to', 'a', 'directory', 'or', 'file.txt')
>>> a_idx = -1
>>> for idx,part in enumerate(path.parts):
... if part == 'a':
... a_idx = idx
... break
...
>>> a_idx
3
>>> pre_path = os.path.join(*path.parts[:a_idx])
>>> post_path = os.path.join(*path.parts[a_idx+1:])
>>> new_path = os.path.join(pre_path, 'different', post_path)
>>> new_path
'/path/to/different/directory/or/file.txt'

In case you don't know the name of the directory, but only its index:
from pathlib import Path
source_path = Path("/path/to/a/directory/or/file.txt")
unknown_name = source.parts[3] # position including root
target_path = "/".join([part if part != unknown_name else "different" for part in source.parts])[1:]
In case you know the name of the directory but not its index, almost the same:
from pathlib import Path
source = Path("/path/to/a/directory/or/file.txt")
src_parts = source.parts
unknown_index = src_parts.index('a')
target_path = "/".join([src_parts[part] if part != unknown_index else "different" for part in range(len(src_parts))])[1:]

Related

Copy to a new directory and Remove files from common directories

I have a dictionary which contains keys and values as below :
defaultdict(, {'A': ['hello.c', 'Aa/hello1.c', 'Aa/diff.c', 'Aa/hello2.h'], 'C': ['Aa/hello1.c', 'Aa/sample.h'], 'B': ['Aa/hello1.c', 'Aa/hello2.h']})
From the above dictionary I extracted the common files present in more than one key:
Aa/hello1.c set(['A', 'C', 'B'])
Aa/hello2.h set(['A', 'B'])
These files are stored in a list along with their paths as mentioned above:
['Aa/hello1.c', 'Aa/hello2.h']
Now I have created a directory using subprocess and I wanted to 'copy' the common files obtained from those directories to newly created directory and remove the files from actual paths after copying into new directory.
I'm new to python, could anyone help me out solving the problem as how to traverse into a list when we have files along with common paths and copy from the absolute path and remove from respective paths.
Thanks in advance!!
import os
from itertools import chain
import collections
from collections import defaultdict
import pdb
import re
import subprocess
from subprocess import call
import shlex
d1 = defaultdict(list)
for path, subdirs, files in os.walk('.'):
for name in files:
var1 = os.path.join(path, name)
fileName = var1.split('/')[-1]
if '.py' in var1: continue
# pdb.set_trace()
matchobj=re.match('\.\/([a-zA-z\-])\/(.*)',var1,re.M|re.I)
if matchobj:
platform = matchobj.group(1)
without_platform = matchobj.group(2)
d1[platform].append(without_platform)
else:
print ('not matching')
print(d1)
rev_dict = {}
for key, value in d1.items():
for i in value:
rev_dict.setdefault(i, set()).add(key)
for i,j in rev_dict.items():
if len(j) > 1:
print ('{} {}'.format(i,j))
var3 = value
print(var3)
var2 = os.path.join(os.getcwd(),"common_files/")
cmd_exe = subprocess.Popen("mkdir sample", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, universal_newlines=True)
cmd_out, cmd_err = cmd_exe.communicate()
You can use the os module.
import os
# make a directory
os.mkdir(path)
# removes files/dirs
os.remove(path)
# to move to destination instead of copy and then delete
os.rename(source, destination)
You could also use the shutil module, described here
Edit:
From what I understand, you can do this:
.
.
import shutil
.
.
.
path_list = []
for i,j in rev_dict.items():
if len(j) > 1:
for each in j:
fullpath = each+"/"+i
path_list.append(fullpath)
print(path_list)
# path_list should now contain the full path
. . .
destination = # new destination path
for source in path_list:
dest = shutil.move(source, destination)
Try it out.
print(path_list) gave me this:
['A/Aa/hello1.c',
'B/Aa/hello1.c',
'C/Aa/hello1.c',
'A/Aa/hello2.h',
'B/Aa/hello2.h']

How to get filename without some special extensions in python

I have a file that has some special extension. Sometime it is '.exe', or 'exe.gz' or 'exe.tar.gz'...I want to get the filename only. I am using the below code to get filename abc but it cannot work for all cases
import os
filename = 'abc.exe'
base = os.path.basename(filename)
print(os.path.splitext(base)[0])
filename = 'abc.exe.gz'
base = os.path.basename(filename)
print(os.path.splitext(base)[0])
Note that, I knew the list of extensions such as ['.exe','exe.gz','exe.tar.gz', '.gz']
You can just split with the . char and take the first element:
>>> filename = 'abc.exe'
>>> filename.split('.')[0]
'abc'
>>> filename = 'abc.exe.gz'
>>> filename.split('.')[0]
'abc'
How about a workaround like this?
suffixes = ['.exe','.exe.gz','.exe.tar.gz', '.gz']
def get_basename(filename):
for suffix in suffixes:
if filename.endswith(suffix):
return filename[:-len(suffix)]
return filename

Python Changing File path name

I want to replace my folder path by a string,I am getting an error.
I tried this :
a="ram"
my_list.to_csv(r'E:\'+str(a)+'\4\mani.csv' )
You made string concatenation mistake. try str.format to avoid such mistakes.
import os
a = "ram"
file_path = r'E:\{a}\4\mani.csv'.format(a=a)
directory = os.path.dirname(file_path)
os.makedirs(path, exist_ok=True)
my_list.to_csv(file_path)
>>> a = "ram"
>>> filename = 'mani.csv'
>>> absolute_path = os.path.join('E:', '4', a, filename)
>>> my_list.to_csv(absolute_path)

Printing a value from a randint

import os
import random
path = os.listdir(r"file path here")
list = [os.listdir(r"life path here")]
print(len(path))
for i in range(len(path)):
full_path = (r"file path here" + path[i])
print(full_path)
print_random_items = random.randint(0, len(path[i]))
print(print_random_items)
So Hi I would like to know how I can print the name of the file associated with the value return to print(print_random_items)
ex: If the value is 15 I would like to print the 15th files name
First time asking a question here sorry if the format is wrong.
Don't bother with the random numbers. Just use random.choice:
random.choice(paths)
For example:
>>> import random
>>> paths = os.listdir('./exampledir')
>>> paths
['a.txt', 'b.txt', 'c.txt', 'd.txt', 'e.txt', 'f.txt']
>>> random.choice(paths)
'e.txt'
>>> random.choice(paths)
'c.txt'
Note: I see in your code that you are not familiar with python style iteration.
This
for i in range(len(path)):
full_path = (r"file path here" + path[i])
print(full_path)
is better written as
for partial_path in path:
full_path = r"file path here" + partial_path
print(full_path)
So rather than using range(len(path)) to get an index you can just iterate over path directly.

Python Os.walk Filter Filenames (What extensions not to include and skip dirs)

filetypes = ("*.jpg","*.txt","*.csv")
filelist = []
for root, dirnames, filenames in os.walk("c:\\"):
for ft in filetypes:
for f in fnmatch.filter(filenames, ft):
filelist.append(os.path.join(root, f))
I have this code which will add to my list only files with the extensions I provide,
1) I want to do the opposite add all file extensions "*.*" and filter some of them I don't need for example "*.dat","*.dll","*.log","*.exe"
2) Also I do not need files from c:\\windows c:\\program files c:\\else can I filter it too?
3) I need it to be fast found this example code from other answer it seems to be faster but what is main speed issue in this type of function os.walk? If so there is scandir github project 7-20 times faster os.walk improved function, or if it's the filtering of file matches by extensions i want to filter 20+ extensions any suggestions?
import os
extns = ('.jpg', '.jpeg', '.png', '.tif', '.tiff')
matches = []
for root, dirnames, fns in os.walk("C:\\"):
matches.extend(os.path.join(root, fn) for fn in fns if fn.lower().endswith(extns))
Your help is very much appreciated
#!/usr/bin/python2.7
import os
import sys
import re
import fnmatch
def findit(root, exclude_files=[], exclude_dirs=[]):
exclude_files = (fnmatch.translate(i) for i in exclude_files)
exclude_files = '('+')|('.join(exclude_files)+')'
exclude_files = re.compile(exclude_files)
exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
return (os.path.join(r,f)
for r,_,f in os.walk(root)
if os.path.normpath(os.path.normcase(r)) not in exclude_dirs
for f in f
if not exclude_files.match(os.path.normcase(f)))
if __name__ == '__main__':
# If you need the entire list in memory at once
filelist = list(findit('c:/',
exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'],
exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else'],
))
# Or this, if you need the items one at a time (saves memory):
for filename in findit('c:/',
exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'],
exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else'],
):
print filename # or stat() or open() the file, or whatever.
I've tried to write a more flexible os.walk to filter dirs and files with a regex, but using pathlib2 functions.
Here it is.
import regex
from pathlib2 import Path
def _recursedir_(self, depth=-1, exclude=None, invert=False):
'''
Parameters
----------
depth : int, optional
depth to stop at, if less than 0 then don't stop. The default is -1.
exclude : compiled regex expression, or dict of regex expressions, keys 'dir', 'file', optional
regex to match current dir/file name against. The default is None.
invert : bool or dict of bools, keys 'dir', 'file', optional
invert the sense of the filter, default is to skip filter matches. The default is False.
Yields
------
Path
Current dir path.
List of dirs
SubDirs in current path.
List of files
Files in current path.
'''
if type(exclude) is dict:
dfilt = exclude['dir']
ffilt = exclude['file']
else:
dfilt = exclude
ffilt = None # means show all files in current path
if type(invert) is dict:
dsens = invert['dir']
fsens = invert['file']
else:
dsens = invert
fsens = False # means skip files that match
if dfilt is None:
dfun = lambda x : True
elif dsens is False:
dfun = lambda x : not dfilt.match(x.name) # filter match EXCLUDES
else:
dfun = lambda x : dfilt.match(x.name) # filter match INCLUDES
if ffilt is None:
ffun = lambda x: True
elif fsens is False:
ffun = lambda x : not ffilt.match(x.name) # filter match EXCLUDES
else:
ffun = lambda x : ffilt.match(x.name) # filter match INCLUDES
d = self.resolve()
dd = [ x for x in d.iterdir() if x.is_dir()]
f = [ x for x in d.iterdir() if x.is_file()]
dd[:] = [ x for x in filter(dfun, dd )]
f[:] = [ x for x in filter(ffun, f)]
yield (d, dd, f)
if depth > 0 or depth < 0:
for xd in dd:
yield from _recursedir_( d / xd, depth=depth-1, exclude=exclude, invert=invert)
Path.recursedir = _recursedir_
So the call to this method is, given a Path variable e.g.
for dir, dlist, flist in Path.home().recursedir(depth = 3, exclude=re.compile(r'\d+'), invert = True ):
print(f"{d} : {dlist} : {flist}")
This will (should !) limit the depth to 3, and exclude all DIRS that begin with digits.
So for multiple file extension matching, one would use,
exclude = { 'dir':None, 'file':re.compile(r'.*\.(?:jpg|jpeg|txt)')}

Categories