Extract a part of the filepath (a directory) in Python - python

I need to extract the name of the parent directory of a certain path. This is what it looks like:
C:\stuff\directory_i_need\subdir\file.jpg
I would like to extract directory_i_need.

import os
## first file in current dir (with full path)
file = os.path.join(os.getcwd(), os.listdir(os.getcwd())[0])
file
os.path.dirname(file) ## directory of file
os.path.dirname(os.path.dirname(file)) ## directory of directory of file
...
And you can continue doing this as many times as necessary...
Edit: from os.path, you can use either os.path.split or os.path.basename:
dir = os.path.dirname(os.path.dirname(file)) ## dir of dir of file
## once you're at the directory level you want, with the desired directory as the final path node:
dirname1 = os.path.basename(dir)
dirname2 = os.path.split(dir)[1] ## if you look at the documentation, this is exactly what os.path.basename does.

For Python 3.4+, try the pathlib module:
>>> from pathlib import Path
>>> p = Path('C:\\Program Files\\Internet Explorer\\iexplore.exe')
>>> str(p.parent)
'C:\\Program Files\\Internet Explorer'
>>> p.name
'iexplore.exe'
>>> p.suffix
'.exe'
>>> p.parts
('C:\\', 'Program Files', 'Internet Explorer', 'iexplore.exe')
>>> p.relative_to('C:\\Program Files')
WindowsPath('Internet Explorer/iexplore.exe')
>>> p.exists()
True

All you need is parent part if you use pathlib.
from pathlib import Path
p = Path(r'C:\Program Files\Internet Explorer\iexplore.exe')
print(p.parent)
Will output:
C:\Program Files\Internet Explorer
Case you need all parts (already covered in other answers) use parts:
p = Path(r'C:\Program Files\Internet Explorer\iexplore.exe')
print(p.parts)
Then you will get a list:
('C:\\', 'Program Files', 'Internet Explorer', 'iexplore.exe')
Saves tone of time.

First, see if you have splitunc() as an available function within os.path. The first item returned should be what you want... but I am on Linux and I do not have this function when I import os and try to use it.
Otherwise, one semi-ugly way that gets the job done is to use:
>>> pathname = "\\C:\\mystuff\\project\\file.py"
>>> pathname
'\\C:\\mystuff\\project\\file.py'
>>> print pathname
\C:\mystuff\project\file.py
>>> "\\".join(pathname.split('\\')[:-2])
'\\C:\\mystuff'
>>> "\\".join(pathname.split('\\')[:-1])
'\\C:\\mystuff\\project'
which shows retrieving the directory just above the file, and the directory just above that.

import os
directory = os.path.abspath('\\') # root directory
print(directory) # e.g. 'C:\'
directory = os.path.abspath('.') # current directory
print(directory) # e.g. 'C:\Users\User\Desktop'
parent_directory, directory_name = os.path.split(directory)
print(directory_name) # e.g. 'Desktop'
parent_parent_directory, parent_directory_name = os.path.split(parent_directory)
print(parent_directory_name) # e.g. 'User'
This should also do the trick.

This is what I did to extract the piece of the directory:
for path in file_list:
directories = path.rsplit('\\')
directories.reverse()
line_replace_add_directory = line_replace+directories[2]
Thank you for your help.

You have to put the entire path as a parameter to os.path.split. See The docs. It doesn't work like string split.

Related

Cut down path name in Python

This is my current code:
directory = "C:/Users/test/Desktop/test/sign off img"
choices = glob.glob(os.path.join(directory, "*.jpg"))
print(choices)
This will return me every single path to all .JPG files inside that specific folder.
As an example, here is the output for the current above code:
['C:/Users/test/Desktop/test/sign off img\\SFDG001 0102400OL - signed.jpg', 'C:/Users/test/Desktop/test/sign off img\\SFDG001 0102400OL.jpg']
How can I get the output to only return the ending of the path?
This is my desire outcome:
['SFDG001 0102400OL - signed.jpg', 'SFDG001 0102400OL.jpg']
Same path, but just the end string is returned.
You can use the os.listdir function:
>>> import os
>>> files = os.listdir("C:/Users/test/Desktop/test/sign off img")
>>> filtered_files = [file for file in files if 'signed' in file]
As you can see in thee docs, os.listdir uses the current directory as default argument, i.e., if you don't pass a value. Otherwise, it will use the path you pass to it.
I recommend using pathlib.Path over os most of the time. Try this, for example:
from pathlib import Path
directory = Path("C:/Users/test/Desktop/test/sign off img")
choices = [path.name for path in directory.glob("*.jpg")]

How to delete a file by extension in Python?

I was messing around just trying to make a script that deletes items by ".zip" extension.
import sys
import os
from os import listdir
test=os.listdir("/Users/ben/downloads/")
for item in test:
if item.endswith(".zip"):
os.remove(item)
Whenever I run the script I get:
OSError: [Errno 2] No such file or directory: 'cities1000.zip'
cities1000.zip is obviously a file in my downloads folder.
What did I do wrong here? Is the issue that os.remove requires the full path to the file? If this is the issue, than how can I do that in this current script without completely rewriting it.
You can set the path in to a dir_name variable, then use os.path.join for your os.remove.
import os
dir_name = "/Users/ben/downloads/"
test = os.listdir(dir_name)
for item in test:
if item.endswith(".zip"):
os.remove(os.path.join(dir_name, item))
For this operation you need to append the file name on to the file path so the command knows what folder you are looking into.
You can do this correctly and in a portable way in python using the os.path.join command.
For example:
import os
directory = "/Users/ben/downloads/"
test = os.listdir( directory )
for item in test:
if item.endswith(".zip"):
os.remove( os.path.join( directory, item ) )
Alternate approach that avoids join-ing yourself over and over: Use glob module to join once, then let it give you back the paths directly.
import glob
import os
dir = "/Users/ben/downloads/"
for zippath in glob.iglob(os.path.join(dir, '*.zip')):
os.remove(zippath)
I think you could use Pathlib-- a modern way, like the following:
import pathlib
dir = pathlib.Path("/Users/ben/downloads/")
zip_files = dir.glob(dir / "*.zip")
for zf in zip_files:
zf.unlink()
If you want to delete all zip files recursively, just write so:
import pathlib
dir = pathlib.Path("/Users/ben/downloads/")
zip_files = dir.rglob(dir / "*.zip") # recursively
for zf in zip_files:
zf.unlink()
Just leaving my two cents on this issue: if you want to be chic you can use glob or iglob from the glob package, like so:
import glob
import os
files_in_dir = glob.glob('/Users/ben/downloads/*.zip')
# or if you want to be fancy, you can use iglob, which returns an iterator:
files_in_dir = glob.iglob('/Users/ben/downloads/*.zip')
for _file in files_in_dir:
print(_file) # just to be sure, you know how it is...
os.remove(_file)
origfolder = "/Users/ben/downloads/"
test = os.listdir(origfolder)
for item in test:
if item.endswith(".zip"):
os.remove(os.path.join(origfolder, item))
The dirname is not included in the os.listdir output. You have to attach it to reference the file from the list returned by said function.
Prepend the directory to the filename
os.remove("/Users/ben/downloads/" + item)
EDIT: or change the current working directory using os.chdir.

Unable to read file from directory

I have following Directory structure,
F:\TestData
and TestData contains 20 folders with name node1, node2,..., node20
and each node folder contains file with names log.10.X
I need to access each log file from all node folders, For which I have writen code, but it is saying, File not found - log.*
CODE:
directory = "F:\TestData"
p = subprocess.Popen(["find", "./" + directory, "-name", "log.*"], stdout=subprocess.PIPE)
output, err = p.communicate()
foutput = output.split("\n")
Python, unlike POSIX shells, does not automatically do globbing (interpreting * and the like as wildcards related to files in the relevant directory) in strings. It does, however, provide a glob module for that purpose. You can use this to get a list of matching filenames:
import glob
filenames = glob.glob(r'F:\TestData\node*\log.*')
You can just use python to get a list of files in the directory
import os
directory = "F:\TestData\"
file_list = os.listdir(directory)
log_list = filter(lambda x: x.startswith("log"), file_list)
oh, you have to code to iterate sub directory.
First os.listdir()in the parent directory ,and iterate the sub directory to get the files
Python's glob module may be an option.
import glob
directory = 'F:\TestData'
logcontents = [open(f,'r').read() for f in glob.glob(directory + '\node*\log.*')]
You also use walk, like this:
import os
directory = "F:\TestData"
for i in os.walk(directory):
# i like this:
# ('F:\\TestData', ['node1', 'node2', 'node3'], [])
# ('F:\\TestData\\node1', [], ['log.1.txt'])
# ('F:\\TestData\\node2', [], ['log.2.txt'])
print i
if i[2] != []:
# TODO: use the path to finish other
# If dictory noden have some log file, you should use i[2][n].
# So, if you only need log.n.txt, you only use i[2][n].
print os.path.join(i[0], i[2][0])

Open file in a relative location in Python

Suppose my python code is executed a directory called main and the application needs to access main/2091/data.txt.
how should I use open(location)? what should the parameter location be?
I found that below simple code will work.. does it have any disadvantages?
file = "\2091\sample.txt"
path = os.getcwd()+file
fp = open(path, 'r+');
With this type of thing you need to be careful what your actual working directory is. For example, you may not run the script from the directory the file is in. In this case, you can't just use a relative path by itself.
If you are sure the file you want is in a subdirectory beneath where the script is actually located, you can use __file__ to help you out here. __file__ is the full path to where the script you are running is located.
So you can fiddle with something like this:
import os
script_dir = os.path.dirname(__file__) #<-- absolute dir the script is in
rel_path = "2091/data.txt"
abs_file_path = os.path.join(script_dir, rel_path)
This code works fine:
import os
def read_file(file_name):
file_handle = open(file_name)
print file_handle.read()
file_handle.close()
file_dir = os.path.dirname(os.path.realpath('__file__'))
print file_dir
#For accessing the file in the same folder
file_name = "same.txt"
read_file(file_name)
#For accessing the file in a folder contained in the current folder
file_name = os.path.join(file_dir, 'Folder1.1/same.txt')
read_file(file_name)
#For accessing the file in the parent folder of the current folder
file_name = os.path.join(file_dir, '../same.txt')
read_file(file_name)
#For accessing the file inside a sibling folder.
file_name = os.path.join(file_dir, '../Folder2/same.txt')
file_name = os.path.abspath(os.path.realpath(file_name))
print file_name
read_file(file_name)
I created an account just so I could clarify a discrepancy I think I found in Russ's original response.
For reference, his original answer was:
import os
script_dir = os.path.dirname(__file__)
rel_path = "2091/data.txt"
abs_file_path = os.path.join(script_dir, rel_path)
This is a great answer because it is trying to dynamically creates an absolute system path to the desired file.
Cory Mawhorter noticed that __file__ is a relative path (it is as well on my system) and suggested using os.path.abspath(__file__). os.path.abspath, however, returns the absolute path of your current script (i.e. /path/to/dir/foobar.py)
To use this method (and how I eventually got it working) you have to remove the script name from the end of the path:
import os
script_path = os.path.abspath(__file__) # i.e. /path/to/dir/foobar.py
script_dir = os.path.split(script_path)[0] #i.e. /path/to/dir/
rel_path = "2091/data.txt"
abs_file_path = os.path.join(script_dir, rel_path)
The resulting abs_file_path (in this example) becomes: /path/to/dir/2091/data.txt
It depends on what operating system you're using. If you want a solution that is compatible with both Windows and *nix something like:
from os import path
file_path = path.relpath("2091/data.txt")
with open(file_path) as f:
<do stuff>
should work fine.
The path module is able to format a path for whatever operating system it's running on. Also, python handles relative paths just fine, so long as you have correct permissions.
Edit:
As mentioned by kindall in the comments, python can convert between unix-style and windows-style paths anyway, so even simpler code will work:
with open("2091/data/txt") as f:
<do stuff>
That being said, the path module still has some useful functions.
I spend a lot time to discover why my code could not find my file running Python 3 on the Windows system. So I added . before / and everything worked fine:
import os
script_dir = os.path.dirname(__file__)
file_path = os.path.join(script_dir, './output03.txt')
print(file_path)
fptr = open(file_path, 'w')
Try this:
from pathlib import Path
data_folder = Path("/relative/path")
file_to_open = data_folder / "file.pdf"
f = open(file_to_open)
print(f.read())
Python 3.4 introduced a new standard library for dealing with files and paths called pathlib. It works for me!
Code:
import os
script_path = os.path.abspath(__file__)
path_list = script_path.split(os.sep)
script_directory = path_list[0:len(path_list)-1]
rel_path = "main/2091/data.txt"
path = "/".join(script_directory) + "/" + rel_path
Explanation:
Import library:
import os
Use __file__ to attain the current script's path:
script_path = os.path.abspath(__file__)
Separates the script path into multiple items:
path_list = script_path.split(os.sep)
Remove the last item in the list (the actual script file):
script_directory = path_list[0:len(path_list)-1]
Add the relative file's path:
rel_path = "main/2091/data.txt
Join the list items, and addition the relative path's file:
path = "/".join(script_directory) + "/" + rel_path
Now you are set to do whatever you want with the file, such as, for example:
file = open(path)
import os
def file_path(relative_path):
dir = os.path.dirname(os.path.abspath(__file__))
split_path = relative_path.split("/")
new_path = os.path.join(dir, *split_path)
return new_path
with open(file_path("2091/data.txt"), "w") as f:
f.write("Powerful you have become.")
If the file is in your parent folder, eg. follower.txt, you can simply use open('../follower.txt', 'r').read()
Get the path of the parent folder, then os.join your relative files to the end.
# get parent folder with `os.path`
import os.path
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# now use BASE_DIR to get a file relative to the current script
os.path.join(BASE_DIR, "config.yaml")
The same thing with pathlib:
# get parent folder with `pathlib`'s Path
from pathlib import Path
BASE_DIR = Path(__file__).absolute().parent
# now use BASE_DIR to get a file relative to the current script
BASE_DIR / "config.yaml"
Python just passes the filename you give it to the operating system, which opens it. If your operating system supports relative paths like main/2091/data.txt (hint: it does), then that will work fine.
You may find that the easiest way to answer a question like this is to try it and see what happens.
Not sure if this work everywhere.
I'm using ipython in ubuntu.
If you want to read file in current folder's sub-directory:
/current-folder/sub-directory/data.csv
your script is in current-folder
simply try this:
import pandas as pd
path = './sub-directory/data.csv'
pd.read_csv(path)
When I was a beginner I found these descriptions a bit intimidating. As at first I would try
For Windows
f= open('C:\Users\chidu\Desktop\Skipper New\Special_Note.txt','w+')
print(f)
and this would raise an syntax error. I used get confused alot. Then after some surfing across google. found why the error occurred. Writing this for beginners
It's because for path to be read in Unicode you simple add a \ when starting file path
f= open('C:\\Users\chidu\Desktop\Skipper New\Special_Note.txt','w+')
print(f)
And now it works just add \ before starting the directory.
In Python 3.4 (PEP 428) the pathlib was introduced, allowing you to work with files in an object oriented fashion:
from pathlib import Path
working_directory = Path(os.getcwd())
path = working_directory / "2091" / "sample.txt"
with path.open('r+') as fp:
# do magic
The with keyword will also ensure that your resources get closed properly, even if you get something goes wrong (like an unhandled Exception, sigint or similar)

Directory-tree listing in Python

How do I get a list of all files (and directories) in a given directory in Python?
This is a way to traverse every file and directory in a directory tree:
import os
for dirname, dirnames, filenames in os.walk('.'):
# print path to all subdirectories first.
for subdirname in dirnames:
print(os.path.join(dirname, subdirname))
# print path to all filenames.
for filename in filenames:
print(os.path.join(dirname, filename))
# Advanced usage:
# editing the 'dirnames' list will stop os.walk() from recursing into there.
if '.git' in dirnames:
# don't go into any .git directories.
dirnames.remove('.git')
You can use
os.listdir(path)
For reference and more os functions look here:
Python 2 docs: https://docs.python.org/2/library/os.html#os.listdir
Python 3 docs: https://docs.python.org/3/library/os.html#os.listdir
Here's a helper function I use quite often:
import os
def listdir_fullpath(d):
return [os.path.join(d, f) for f in os.listdir(d)]
import os
for filename in os.listdir("C:\\temp"):
print filename
If you need globbing abilities, there's a module for that as well. For example:
import glob
glob.glob('./[0-9].*')
will return something like:
['./1.gif', './2.txt']
See the documentation here.
For files in current working directory without specifying a path
Python 2.7:
import os
os.listdir('.')
Python 3.x:
import os
os.listdir()
Try this:
import os
for top, dirs, files in os.walk('./'):
for nm in files:
print os.path.join(top, nm)
While os.listdir() is fine for generating a list of file and dir names, frequently you want to do more once you have those names - and in Python3, pathlib makes those other chores simple. Let's take a look and see if you like it as much as I do.
To list dir contents, construct a Path object and grab the iterator:
In [16]: Path('/etc').iterdir()
Out[16]: <generator object Path.iterdir at 0x110853fc0>
If we want just a list of names of things:
In [17]: [x.name for x in Path('/etc').iterdir()]
Out[17]:
['emond.d',
'ntp-restrict.conf',
'periodic',
If you want just the dirs:
In [18]: [x.name for x in Path('/etc').iterdir() if x.is_dir()]
Out[18]:
['emond.d',
'periodic',
'mach_init.d',
If you want the names of all conf files in that tree:
In [20]: [x.name for x in Path('/etc').glob('**/*.conf')]
Out[20]:
['ntp-restrict.conf',
'dnsextd.conf',
'syslog.conf',
If you want a list of conf files in the tree >= 1K:
In [23]: [x.name for x in Path('/etc').glob('**/*.conf') if x.stat().st_size > 1024]
Out[23]:
['dnsextd.conf',
'pf.conf',
'autofs.conf',
Resolving relative paths become easy:
In [32]: Path('../Operational Metrics.md').resolve()
Out[32]: PosixPath('/Users/starver/code/xxxx/Operational Metrics.md')
Navigating with a Path is pretty clear (although unexpected):
In [10]: p = Path('.')
In [11]: core = p / 'web' / 'core'
In [13]: [x for x in core.iterdir() if x.is_file()]
Out[13]:
[PosixPath('web/core/metrics.py'),
PosixPath('web/core/services.py'),
PosixPath('web/core/querysets.py'),
A recursive implementation
import os
def scan_dir(dir):
for name in os.listdir(dir):
path = os.path.join(dir, name)
if os.path.isfile(path):
print path
else:
scan_dir(path)
I wrote a long version, with all the options I might need: http://sam.nipl.net/code/python/find.py
I guess it will fit here too:
#!/usr/bin/env python
import os
import sys
def ls(dir, hidden=False, relative=True):
nodes = []
for nm in os.listdir(dir):
if not hidden and nm.startswith('.'):
continue
if not relative:
nm = os.path.join(dir, nm)
nodes.append(nm)
nodes.sort()
return nodes
def find(root, files=True, dirs=False, hidden=False, relative=True, topdown=True):
root = os.path.join(root, '') # add slash if not there
for parent, ldirs, lfiles in os.walk(root, topdown=topdown):
if relative:
parent = parent[len(root):]
if dirs and parent:
yield os.path.join(parent, '')
if not hidden:
lfiles = [nm for nm in lfiles if not nm.startswith('.')]
ldirs[:] = [nm for nm in ldirs if not nm.startswith('.')] # in place
if files:
lfiles.sort()
for nm in lfiles:
nm = os.path.join(parent, nm)
yield nm
def test(root):
print "* directory listing, with hidden files:"
print ls(root, hidden=True)
print
print "* recursive listing, with dirs, but no hidden files:"
for f in find(root, dirs=True):
print f
print
if __name__ == "__main__":
test(*sys.argv[1:])
Here is another option.
os.scandir(path='.')
It returns an iterator of os.DirEntry objects corresponding to the entries (along with file attribute information) in the directory given by path.
Example:
with os.scandir(path) as it:
for entry in it:
if not entry.name.startswith('.'):
print(entry.name)
Using scandir() instead of listdir() can significantly increase the performance of code that also needs file type or file attribute information, because os.DirEntry objects expose this information if the operating system provides it when scanning a directory. All os.DirEntry methods may perform a system call, but is_dir() and is_file() usually only require a system call for symbolic links; os.DirEntry.stat() always requires a system call on Unix but only requires one for symbolic links on Windows.
Python Docs
The one worked with me is kind of a modified version from Saleh's answer elsewhere on this page.
The code is as follows:
dir = 'given_directory_name'
filenames = [os.path.abspath(os.path.join(dir,i)) for i in os.listdir(dir)]
A nice one liner to list only the files recursively. I used this in my setup.py package_data directive:
import os
[os.path.join(x[0],y) for x in os.walk('<some_directory>') for y in x[2]]
I know it's not the answer to the question, but may come in handy
For Python 2
#!/bin/python2
import os
def scan_dir(path):
print map(os.path.abspath, os.listdir(pwd))
For Python 3
For filter and map, you need wrap them with list()
#!/bin/python3
import os
def scan_dir(path):
print(list(map(os.path.abspath, os.listdir(pwd))))
The recommendation now is that you replace your usage of map and filter with generators expressions or list comprehensions:
#!/bin/python
import os
def scan_dir(path):
print([os.path.abspath(f) for f in os.listdir(path)])
#import modules
import os
_CURRENT_DIR = '.'
def rec_tree_traverse(curr_dir, indent):
"recurcive function to traverse the directory"
#print "[traverse_tree]"
try :
dfList = [os.path.join(curr_dir, f_or_d) for f_or_d in os.listdir(curr_dir)]
except:
print "wrong path name/directory name"
return
for file_or_dir in dfList:
if os.path.isdir(file_or_dir):
#print "dir : ",
print indent, file_or_dir,"\\"
rec_tree_traverse(file_or_dir, indent*2)
if os.path.isfile(file_or_dir):
#print "file : ",
print indent, file_or_dir
#end if for loop
#end of traverse_tree()
def main():
base_dir = _CURRENT_DIR
rec_tree_traverse(base_dir," ")
raw_input("enter any key to exit....")
#end of main()
if __name__ == '__main__':
main()
FYI Add a filter of extension or ext file
import os
path = '.'
for dirname, dirnames, filenames in os.walk(path):
# print path to all filenames with extension py.
for filename in filenames:
fname_path = os.path.join(dirname, filename)
fext = os.path.splitext(fname_path)[1]
if fext == '.py':
print fname_path
else:
continue
If figured I'd throw this in. Simple and dirty way to do wildcard searches.
import re
import os
[a for a in os.listdir(".") if re.search("^.*\.py$",a)]
Below code will list directories and the files within the dir
def print_directory_contents(sPath):
import os
for sChild in os.listdir(sPath):
sChildPath = os.path.join(sPath,sChild)
if os.path.isdir(sChildPath):
print_directory_contents(sChildPath)
else:
print(sChildPath)
Here is a one line Pythonic version:
import os
dir = 'given_directory_name'
filenames = [os.path.join(os.path.dirname(os.path.abspath(__file__)),dir,i) for i in os.listdir(dir)]
This code lists the full path of all files and directories in the given directory name.
I know this is an old question. This is a neat way I came across if you are on a liunx machine.
import subprocess
print(subprocess.check_output(["ls", "/"]).decode("utf8"))
Easiest way:
list_output_files = [os.getcwd()+"\\"+f for f in os.listdir(os.getcwd())]

Categories