I would like to write a Python function that is capable of taking a file path, like:
/abs/path/to/my/file/file.txt
And returning three string variables:
/abs - the root directory, plus the "top-most" directory in the path
file - the "bottom-most" directory in the path; the parent of file.txt
path/to/my - everything in between the top- and bottom-most directories in the path
So something with the following pseudo-code:
def extract_path_segments(file):
absPath = get_abs_path(file)
top = substring(absPath, 0, str_post(absPath, "/", FIRST))
bottom = substring(absPath, 0, str_post(absPath, "/", LAST))
middle = str_diff(absPath, top, bottom)
return (top, middle, bottom)
Thanks in advance for any help here!
You are looking for os.sep, together with various os.path module functions. Simply split the path by that character, then re-assemble the parts you want to use. Something like:
import os
def extract_path_segments(path, sep=os.sep):
path, filename = os.path.split(os.path.abspath(path))
bottom, rest = path[1:].split(sep, 1)
bottom = sep + bottom
middle, top = os.path.split(rest)
return (bottom, middle, top)
This does not deal very well with Windows paths, where both \ and / are legal path separators. In that case you also have a drive letter, so you'd have to special-case that as well anyway.
Output:
>>> extract_path_segments('/abs/path/to/my/file/file.txt')
('/abs', 'path/to/my', 'file')
use os.path.split:
import os.path
def split_path(path):
"""
Returns a 2-tuple of the form `root, list_of_path_parts`
"""
head,tail = os.path.split(path)
out = []
while tail:
out.append(tail)
head,tail = os.path.split(head)
return head,list(reversed(out))
def get_parts(path):
root,path_parts = split_path(path)
head = os.path.join(root,path_parts[0])
path_to = os.path.join(*path_parts[1:-2])
parentdir = path_parts[-2]
return head,path_to,parentdir
head,path_to,parentdir = get_parts('/foo/path/to/bar/baz')
print (head) #foo
print (path_to) #path/to
print (parentdir) #bar
Using os.path.split() and os.path.join() as we are supposed to
>>> import os
>>> pth = "/abs/path/to/my/file/file.txt"
>>> parts = []
>>> while True:
... pth, last = os.path.split(pth)
... if not last:
... break
... parts.append(last)
...
>>> pth + parts[-1]
'/abs'
>>> parts[1]
'file'
>>> os.path.join(*parts[-2:1:-1])
'path/to/my'
As a function
import os
def extract_path_segments(pth):
parts = []
while True:
pth, last = os.path.split(pth)
if not last:
break
parts.append(last)
return pth + parts[-1], parts[1], os.path.join(*parts[-2:1:-1])
>>> p = '/abs/path/to/my/file/file.txt'
>>> r = p.split('/')
>>> r[1],'/'.join(r[2:-2]),r[-2]
('abs', 'path/to/my', 'file')
Related
I have a folder with more than 1.000 files that's updated constantly. I'm using a script to add a random number to it based on the total of the files, like this:
Before
file_a
file_b
After
1_file_a
2_file_b
I would like to add leading zeros so that the files are sorted correctly. Like this:
0001_file_a
0010_file_b
0100_file_c
Here's the random number script:
import os
import random
used_random = []
os.chdir('c:/test')
for filename in os.listdir():
n = random.randint(1, len(os.listdir()))
while n in used_random:
n = random.randint(1, len(os.listdir()))
used_random.append(n)
os.rename(filename, f"{n}_{filename}")
I would suggest using f-strings to accomplish this.
>>> num = 2
>>> f"{num:04}_file"
'0002_file'
>>> num = 123
>>> f"{num:04}_file"
'0123_file'
I would also replace the following with a list comprehension.
cleaned_files = []
for item in folder_files:
if item[0] == '.' or item[0] == '_':
pass
else:
cleaned_files.append(item)
cleaned_files = [item for item in folder_files if not item[0] in ('.', '_')]
You should use the first element of the list obtained after split:
def getFiles(files):
for file in files:
file_number, file_end = file.split('_')
num = file_number.split()[0].zfill(4) # num is 4 characters long with leading 0
new_file = "{}_{}".format(num, file_end)
# rename or store the new file name for later rename
Something like this should work ... I hope this helps ...
import re
import glob
import os
import shutil
os.chdir('/tmp') # I played in the /tmp directory
for filename in glob.glob('[0-9]*_file_*'):
m = re.match(r'(^[0-9]+)(_.*)$', filename)
if m:
num = f"{int(m.group(1)):04}" # e.g. 23 convert to int and than format
name = m.group(2) # the rest of the name e.g. _file_a
new_filename = num + name # 0023_file_a
print(filename + " " + new_filename)
# Not sure if you like to rename the files, if yes:
# shutil.move(filename, new_filename)
Thanks to user https://stackoverflow.com/users/15261315/chris I updated the random number script to add leading zeros:
import os
import random
used_random = []
os.chdir('c:/Test')
for filename in os.listdir():
n = random.randint(1, len(os.listdir()))
while n in used_random:
n = random.randint(1, len(os.listdir()))
used_random.append(n)
os.rename(filename, f"{n:04}_{filename}")
I have a python script for Editorial on iOS that I've modified, and I would like help tweaking it further.
I have .taskpaper files in a dropbox folder that Editorial is pointed at. When I run this workflow the script search all the files and return a list of lines that include "#hardware". This is working well but the final list includes items with #hardware that I've finished and appended with #done. How can I exclude #hardware lines with #done?
There are seven files that run. These two seem to be the ones that need to be modified:
Generate the list of hashtags
import editor
import console
import os
import re
import sys
import codecs
import workflow
pattern = re.compile(r'\s#{1}(\w+)', re.I|re.U)
p = editor.get_path()
from urllib import quote
dir = os.path.split(p)[0]
valid_extensions = set(['.taskpaper'])
tags = ['#hardware']
for w in os.walk(dir):
dir_path = w[0]
filenames = w[2]
for name in filenames:
full_path = os.path.join(dir_path, name)
ext = os.path.splitext(full_path)[1]
if ext.lower() in valid_extensions:
try:
with codecs.open(full_path, 'r', 'utf-8') as f:
for line in f:
for match in re.finditer(pattern, line):
tags.append(match.group(1))
except UnicodeDecodeError, e:
pass
workflow.set_output('\n'.join(sorted(set(tags))))
and
Search documents with hashtags
import editor
import console
import os
import re
import sys
import codecs
import workflow
from StringIO import StringIO
theme = editor.get_theme()
workflow.set_variable('CSS', workflow.get_variable('CSS Dark' if theme == 'Dark' else 'CSS Light'))
p = editor.get_path()
searchterm = workflow.get_variable('Search Term')
term = '#' + searchterm
pattern = re.compile(re.escape(term), flags=re.IGNORECASE)
from urllib import quote
dir = os.path.split(p)[0]
valid_extensions = set(['.taskpaper'])
html = StringIO()
match_count = 0
for w in os.walk(dir):
dir_path = w[0]
filenames = w[2]
for name in filenames:
full_path = os.path.join(dir_path, name)
ext = os.path.splitext(full_path)[1]
if ext.lower() not in valid_extensions:
continue
found_snippets = []
i = 0
try:
with codecs.open(full_path, 'r', 'utf-8') as f:
for line in f:
for match in re.finditer(pattern, line):
start = max(0, match.start(0) - 100)
end = min(len(line)-1, match.end(0) + 100)
snippet = (line[start:match.start(0)],
match.group(0),
line[match.end(0):end],
match.start(0) + i,
match.end(0) + i)
found_snippets.append(snippet)
i += len(line)
except UnicodeDecodeError, e:
pass
if len(found_snippets) > 0:
match_count += 1
root, rel_path = editor.to_relative_path(full_path)
ed_url = 'editorial://open/' + quote(rel_path.encode('utf-8')) + '?root=' + root
html.write('<h2>' + name + '</h2>')
for snippet in found_snippets:
start = snippet[3]
end = snippet[4]
select_url = 'editorial://open/' + quote(rel_path.encode('utf-8')) + '?root=' + root
select_url += '&selection=' + str(start) + '-' + str(end)
html.write('<a class="result-box" href="' + select_url + '">' + snippet[0] + '<span class="highlight">' + snippet[1] + '</span>' + snippet[2] + '</a>')
if match_count == 0:
html.write('<p>No matches found.</p>')
workflow.set_output(html.getvalue())
Thank you.
Since the matching lines are stored in a list, you can use a list comprhension to exlcude the ones you don't want. Something like this:
l = ['#hardware ttuff', 'stuff #hardware', 'things #hardware sett #done', '#hardware', '#hardware# #done']
print(l)
['#hardware ttuff', 'stuff #hardware', 'things #hardware sett #done', '#hardware', '#hardware# #done']
m = [ s for s in l if '#done' not in s]
print(m)
['#hardware ttuff', 'stuff #hardware', '#hardware']
A friend solved it for me.
We added:
if not "#done" in line:
in the "Search documents with hashtags" file after
for line in f:
Works great
Is there a builtin function to get url like this: ../images.html given a base url like this: http://www.example.com/faq/index.html and a target url such as http://www.example.com/images.html
I checked urlparse module. What I want is counterpart of the urljoin() function.
You could use urlparse.urlparse to find the paths, and the posixpath version of os.path.relname to find the relative path.
(Warning: This works for Linux, but may not for Windows):
import urlparse
import sys
import posixpath
def relurl(target,base):
base=urlparse.urlparse(base)
target=urlparse.urlparse(target)
if base.netloc != target.netloc:
raise ValueError('target and base netlocs do not match')
base_dir='.'+posixpath.dirname(base.path)
target='.'+target.path
return posixpath.relpath(target,start=base_dir)
tests=[
('http://www.example.com/images.html','http://www.example.com/faq/index.html','../images.html'),
('http://google.com','http://google.com','.'),
('http://google.com','http://google.com/','.'),
('http://google.com/','http://google.com','.'),
('http://google.com/','http://google.com/','.'),
('http://google.com/index.html','http://google.com/','index.html'),
('http://google.com/index.html','http://google.com/index.html','index.html'),
]
for target,base,answer in tests:
try:
result=relurl(target,base)
except ValueError as err:
print('{t!r},{b!r} --> {e}'.format(t=target,b=base,e=err))
else:
if result==answer:
print('{t!r},{b!r} --> PASS'.format(t=target,b=base))
else:
print('{t!r},{b!r} --> {r!r} != {a!r}'.format(
t=target,b=base,r=result,a=answer))
The first solutions that comes to mind is:
>>> os.path.relpath('/images.html', os.path.dirname('/faq/index.html'))
'../images.html'
Of course, this requires URL parsing -> domain name comparison (!!) -> path rewriting if that's the case -> re-adding query and fragment.
Edit: a more complete version
import urlparse
import posixpath
def relative_url(destination, source):
u_dest = urlparse.urlsplit(destination)
u_src = urlparse.urlsplit(source)
_uc1 = urlparse.urlunsplit(u_dest[:2]+tuple('' for i in range(3)))
_uc2 = urlparse.urlunsplit(u_src[:2]+tuple('' for i in range(3)))
if _uc1 != _uc2:
## This is a different domain
return destination
_relpath = posixpath.relpath(u_dest.path, posixpath.dirname(u_src.path))
return urlparse.urlunsplit(('', '', _relpath, u_dest.query, u_dest.fragment)
Then
>>> relative_url('http://www.example.com/images.html', 'http://www.example.com/faq/index.html')
'../images.html'
>>> relative_url('http://www.example.com/images.html?my=query&string=here#fragment', 'http://www.example.com/faq/index.html')
'../images.html?my=query&string=here#fragment'
>>> relative_url('http://www.example.com/images.html', 'http://www2.example.com/faq/index.html')
'http://www.example.com/images.html'
>>> relative_url('https://www.example.com/images.html', 'http://www.example.com/faq/index.html')
'https://www.example.com/images.html'
Edit: now using the posixpath implementation of os.path to make it work under windows too.
import itertools
import urlparse
def makeRelativeUrl(sourceUrl, targetUrl):
'''
:param sourceUrl: a string
:param targetUrl: a string
:return: the path to target url relative to first or targetUrl if at different net location
'''
# todo test
parsedSource = urlparse.urlparse(sourceUrl)
parsedTarget = urlparse.urlparse(targetUrl)
if parsedSource.netloc == parsedTarget.netloc:
# if target on same path but lower than source url
if parsedTarget.path.startswith(parsedSource.path):
return parsedTarget.path.replace(parsedSource.path, '.')
# on same path
elif parsedTarget.path.rsplit('/', 1)[0] == parsedSource.path.rsplit('/', 1)[0]:
return './' + parsedTarget.path.rsplit('/', 1)[1]
# same netloc, varying paths
else:
path = ''
upCount = 0
for item in list(itertools.izip_longest(parsedSource.path.rsplit('/'), parsedTarget.path.rsplit('/'))):
if item[0] == item[1]:
pass
else:
if item[0] is not None:
upCount += 1
if item[1] is not None:
path += item[1] + '/'
return upCount * '../' + path
else:
return targetUrl
if __name__ == '__main__':
'''
"tests" :p
'''
url1 = 'http://coolwebsite.com/questions/bobobo/bo/bo/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python'
url2 = 'http://coolwebsite.com/questions/126524/iterate-a-list-with-indexes-in-python'
print url1
print url2
print 'second relative to second:'
print makeRelativeUrl(url1, url2)
url1 = 'http://coolwebsite.com/questions/1663807/how-can-i-iterate-through-two-lists-in-parallel-in-python'
url2 = 'http://coolwebsite.com/questions/1663807/bananas'
print url1
print url2
print 'second relative to first:'
print makeRelativeUrl(url1, url2)
url1 = 'http://coolwebsite.com/questions/1663807/fruits'
url2 = 'http://coolwebsite.com/questions/1663807/fruits/berries/bananas'
print url1
print url2
print 'second relative to first:'
print makeRelativeUrl(url1, url2)
Run 'tests' to see if it works :P
i know i can do this to get the effect of tab completion in python sure.
import readline
COMMANDS = ['extra', 'extension', 'stuff', 'errors',
'email', 'foobar', 'foo']
def complete(text, state):
for cmd in COMMANDS:
if cmd.startswith(text):
if not state:
return cmd
else:
state -= 1
readline.parse_and_bind("tab: complete")
readline.set_completer(complete)
raw_input('Enter section name: ')
I am now interested in doing tab completion with directories. (/home/user/doc >tab)
How would i go about doing such a task?
Here is a quick example of how to perform incremental completion of file system paths. I've modified your example, organizing it into a class where methods named complete_[name] indicate top-level commands.
I've switched the completion function to use the internal readline buffer to determine the state of the overall completion, which makes the state logic a bit simpler. The path completion is in the _complete_path(path) method, and I've hooked up the extra command to perform path completions on its arguments.
I'm sure the code could be further simplified but it should provide you a decent starting point:
import os
import re
import readline
COMMANDS = ['extra', 'extension', 'stuff', 'errors',
'email', 'foobar', 'foo']
RE_SPACE = re.compile('.*\s+$', re.M)
class Completer(object):
def _listdir(self, root):
"List directory 'root' appending the path separator to subdirs."
res = []
for name in os.listdir(root):
path = os.path.join(root, name)
if os.path.isdir(path):
name += os.sep
res.append(name)
return res
def _complete_path(self, path=None):
"Perform completion of filesystem path."
if not path:
return self._listdir('.')
dirname, rest = os.path.split(path)
tmp = dirname if dirname else '.'
res = [os.path.join(dirname, p)
for p in self._listdir(tmp) if p.startswith(rest)]
# more than one match, or single match which does not exist (typo)
if len(res) > 1 or not os.path.exists(path):
return res
# resolved to a single directory, so return list of files below it
if os.path.isdir(path):
return [os.path.join(path, p) for p in self._listdir(path)]
# exact file match terminates this completion
return [path + ' ']
def complete_extra(self, args):
"Completions for the 'extra' command."
if not args:
return self._complete_path('.')
# treat the last arg as a path and complete it
return self._complete_path(args[-1])
def complete(self, text, state):
"Generic readline completion entry point."
buffer = readline.get_line_buffer()
line = readline.get_line_buffer().split()
# show all commands
if not line:
return [c + ' ' for c in COMMANDS][state]
# account for last argument ending in a space
if RE_SPACE.match(buffer):
line.append('')
# resolve command to the implementation function
cmd = line[0].strip()
if cmd in COMMANDS:
impl = getattr(self, 'complete_%s' % cmd)
args = line[1:]
if args:
return (impl(args) + [None])[state]
return [cmd + ' '][state]
results = [c + ' ' for c in COMMANDS if c.startswith(cmd)] + [None]
return results[state]
comp = Completer()
# we want to treat '/' as part of a word, so override the delimiters
readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(comp.complete)
raw_input('Enter section name: ')
Usage:
% python complete.py
Enter section name: ext<tab>
extension extra
Enter section name: extra foo<tab>
foo.py foo.txt foo/
Enter section name: extra foo/<tab>
foo/bar.txt foo/baz.txt
Enter section name: extra foo/bar.txt
Update It will complete paths from the root if the user types /:
% python complete.py
Enter section name: extra /Use<tab>
/Users/.localized /Users/Shared/ /Users/user1 /Users/user2
Enter section name: extra /Users/use<tab>
/Users/user1 /Users/user2
This is enough to enable built in directory tab completion with raw_input():
import readline
readline.parse_and_bind("tab: complete")
This version is for python3, uses pathlib, and a minimalistic version that tab completes files/dirs. It is based on some of the above answers, but only works for files/dirs.
#!/usr/bin/python
import pathlib
import readline
def complete_path(text, state):
incomplete_path = pathlib.Path(text)
if incomplete_path.is_dir():
completions = [p.as_posix() for p in incomplete_path.iterdir()]
elif incomplete_path.exists():
completions = [incomplete_path]
else:
exists_parts = pathlib.Path('.')
for part in incomplete_path.parts:
test_next_part = exists_parts / part
if test_next_part.exists():
exists_parts = test_next_part
completions = []
for p in exists_parts.iterdir():
p_str = p.as_posix()
if p_str.startswith(text):
completions.append(p_str)
return completions[state]
# we want to treat '/' as part of a word, so override the delimiters
readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(complete_path)
print(input('tab complete a filename: '))
For path completion
import os
import sys
import readline
import glob
def path_completer(text, state):
"""
This is the tab completer for systems paths.
Only tested on *nix systems
"""
line = readline.get_line_buffer().split()
if '~' in text:
text = os.path.expanduser('~')
return [x for x in glob.glob(text+'*')][state]
if __name__=="__main__":
readline.set_completer_delims('\t')
readline.parse_and_bind("tab: complete")
readline.set_completer(path_completer)
ans = input("What file do you want? ")
print(ans)
Note that I've refined the code found at https://gist.github.com/iamatypeofwalrus/5637895
i know i can do this to get the effect of tab completion in python sure.
import readline
COMMANDS = ['extra', 'extension', 'stuff', 'errors',
'email', 'foobar', 'foo']
def complete(text, state):
for cmd in COMMANDS:
if cmd.startswith(text):
if not state:
return cmd
else:
state -= 1
readline.parse_and_bind("tab: complete")
readline.set_completer(complete)
raw_input('Enter section name: ')
I am now interested in doing tab completion with directories. (/home/user/doc >tab)
How would i go about doing such a task?
Here is a quick example of how to perform incremental completion of file system paths. I've modified your example, organizing it into a class where methods named complete_[name] indicate top-level commands.
I've switched the completion function to use the internal readline buffer to determine the state of the overall completion, which makes the state logic a bit simpler. The path completion is in the _complete_path(path) method, and I've hooked up the extra command to perform path completions on its arguments.
I'm sure the code could be further simplified but it should provide you a decent starting point:
import os
import re
import readline
COMMANDS = ['extra', 'extension', 'stuff', 'errors',
'email', 'foobar', 'foo']
RE_SPACE = re.compile('.*\s+$', re.M)
class Completer(object):
def _listdir(self, root):
"List directory 'root' appending the path separator to subdirs."
res = []
for name in os.listdir(root):
path = os.path.join(root, name)
if os.path.isdir(path):
name += os.sep
res.append(name)
return res
def _complete_path(self, path=None):
"Perform completion of filesystem path."
if not path:
return self._listdir('.')
dirname, rest = os.path.split(path)
tmp = dirname if dirname else '.'
res = [os.path.join(dirname, p)
for p in self._listdir(tmp) if p.startswith(rest)]
# more than one match, or single match which does not exist (typo)
if len(res) > 1 or not os.path.exists(path):
return res
# resolved to a single directory, so return list of files below it
if os.path.isdir(path):
return [os.path.join(path, p) for p in self._listdir(path)]
# exact file match terminates this completion
return [path + ' ']
def complete_extra(self, args):
"Completions for the 'extra' command."
if not args:
return self._complete_path('.')
# treat the last arg as a path and complete it
return self._complete_path(args[-1])
def complete(self, text, state):
"Generic readline completion entry point."
buffer = readline.get_line_buffer()
line = readline.get_line_buffer().split()
# show all commands
if not line:
return [c + ' ' for c in COMMANDS][state]
# account for last argument ending in a space
if RE_SPACE.match(buffer):
line.append('')
# resolve command to the implementation function
cmd = line[0].strip()
if cmd in COMMANDS:
impl = getattr(self, 'complete_%s' % cmd)
args = line[1:]
if args:
return (impl(args) + [None])[state]
return [cmd + ' '][state]
results = [c + ' ' for c in COMMANDS if c.startswith(cmd)] + [None]
return results[state]
comp = Completer()
# we want to treat '/' as part of a word, so override the delimiters
readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(comp.complete)
raw_input('Enter section name: ')
Usage:
% python complete.py
Enter section name: ext<tab>
extension extra
Enter section name: extra foo<tab>
foo.py foo.txt foo/
Enter section name: extra foo/<tab>
foo/bar.txt foo/baz.txt
Enter section name: extra foo/bar.txt
Update It will complete paths from the root if the user types /:
% python complete.py
Enter section name: extra /Use<tab>
/Users/.localized /Users/Shared/ /Users/user1 /Users/user2
Enter section name: extra /Users/use<tab>
/Users/user1 /Users/user2
This is enough to enable built in directory tab completion with raw_input():
import readline
readline.parse_and_bind("tab: complete")
This version is for python3, uses pathlib, and a minimalistic version that tab completes files/dirs. It is based on some of the above answers, but only works for files/dirs.
#!/usr/bin/python
import pathlib
import readline
def complete_path(text, state):
incomplete_path = pathlib.Path(text)
if incomplete_path.is_dir():
completions = [p.as_posix() for p in incomplete_path.iterdir()]
elif incomplete_path.exists():
completions = [incomplete_path]
else:
exists_parts = pathlib.Path('.')
for part in incomplete_path.parts:
test_next_part = exists_parts / part
if test_next_part.exists():
exists_parts = test_next_part
completions = []
for p in exists_parts.iterdir():
p_str = p.as_posix()
if p_str.startswith(text):
completions.append(p_str)
return completions[state]
# we want to treat '/' as part of a word, so override the delimiters
readline.set_completer_delims(' \t\n;')
readline.parse_and_bind("tab: complete")
readline.set_completer(complete_path)
print(input('tab complete a filename: '))
For path completion
import os
import sys
import readline
import glob
def path_completer(text, state):
"""
This is the tab completer for systems paths.
Only tested on *nix systems
"""
line = readline.get_line_buffer().split()
if '~' in text:
text = os.path.expanduser('~')
return [x for x in glob.glob(text+'*')][state]
if __name__=="__main__":
readline.set_completer_delims('\t')
readline.parse_and_bind("tab: complete")
readline.set_completer(path_completer)
ans = input("What file do you want? ")
print(ans)
Note that I've refined the code found at https://gist.github.com/iamatypeofwalrus/5637895