Translating Matlab code to Python - python

I need to translate chunks of matlab code into Python. My code seems to be 'unreachable' though. Any idea why this is happening?
Also: am I doing it right? I'm a real newbie.
Matlab code:
function Dir = getScriptDir()
fullPath = mfilename('fullpath');
[Dir, ~,~] = fileparts(fullPath);
end
function [list,listSize] = getFileList(Dir)
DirResult = dir( Dir );
list = DirResult(~[DirResult.isdir]); % select files
listSize = size(list);
end
My Python code:
def Dir = getScriptDir():
return os.path.dirname(os.path.realpath(__file__)
def getFileList(Dir):
list = os.listdir(Dir)
listSize = len(list)
getFileList() = [list, listSize]

Your syntax is incorrect. If I'm reading this correctly, you're trying to get the names of the files in the same directory as the script and print the number of files in that list.
Here's an example of how you might do this (based on the program you gave):
import os
def getFileList(directory = os.path.dirname(os.path.realpath(__file__))):
list = os.listdir(directory)
listSize = len(list)
return [list, listSize]
print(getFileList())
Output example:
[['program.py', 'data', 'syntax.py'], 3]

Your function definitions were incorrect. I have modified the code you provided. You can also consolidate the getScriptDir() functionality into the getFileList() function.
import os
def getFileList():
dir = os.path.dirname(os.path.realpath(__file__))
list = os.listdir(dir)
listSize = len(list)
fileList = [list, listSize]
return fileList
print(getFileList())
Returns: (in my environment)
[['test.py', 'test.txt', 'test2.py', 'test2.txt', 'test3.py', 'test4.py', 'testlog.txt', '__pycache__'], 8]
Your script functions - including getScriptDir(modified):
import os
def getScriptDir():
return os.path.dirname(os.path.realpath(__file__))
def getFileList(dir):
dir = os.path.dirname(os.path.realpath(__file__))
list = os.listdir(dir)
listSize = len(list)
fileList = [list, listSize]
return fileList
dir = getScriptDir()
print(getFileList(dir))

Remember that you need to return variables from a python-function to get their results.
More on how to define your own functions in python: https://docs.python.org/3/tutorial/controlflow.html#defining-functions

Related

How to make this files function non-recursive?

This is a function that lists all the files in the current folder and the subfolder which is recursive, I'm not able to write it non-recursively
I tried using while and for loops in nested format but I was not able to make it work.
def recur_files(start_dir):
files = []
original_path = os.getcwd()
os.chdir(start_dir)
items = os.listdir()
for item in items:
if "." in item:
files.append(os.path.abspath(item))
else:
files.extend(recur_files(os.path.abspath(item)))
os.chdir(original_path)
return files
Example:
from os import walk
files = []
for _, _, filenames in walk(your_path):
files.extend(filenames)
print("Files: {}".format(files))
You can get all files recursively in you path.
If you want to list your files in a Depth First Search fashion without using the program stack (aka via recursion), you can always create your own stack (just a list in Python) and write a simple DFS algorithm as follows.
import os
def recur_files():
original_path = os.getcwd()
stack = os.listdir()
results = []
for item in stack:
os.path.join(original_path, item)
while stack:
elem = stack.pop(0)
if os.path.isdir(elem):
results.append(elem)
for item in os.listdir(elem):
stack = [os.path.join(elem, item)] + stack
else:
results.append(elem)
return results
You can use os.walk() to get all files in folder and subfolders.
But if you want to create own function then you need list for dirs. Loop should get dirname from this list and you should add new dirs to this list instead of running function with new start_dir
import os
def recur_files(start_dir):
files = []
dirs = [start_dir]
for dirname in dirs:
for item in os.listdir(dirname):
fullpath = os.path.join(dirname, item)
if os.path.isdir(fullpath): #and fullpath not in ('.', '..'):
dirs.append(fullpath)
else:
files.append(fullpath)
return files, dirs
recur_files('.')

How to find size of files in D: drive using python [duplicate]

Before I re-invent this particular wheel, has anybody got a nice routine for calculating the size of a directory using Python? It would be very nice if the routine would format the size nicely in Mb/Gb etc.
This walks all sub-directories; summing file sizes:
import os
def get_size(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
return total_size
print(get_size(), 'bytes')
And a oneliner for fun using os.listdir (Does not include sub-directories):
import os
sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f))
Reference:
os.path.getsize - Gives the size in bytes
os.walk
os.path.islink
Updated
To use os.path.getsize, this is clearer than using the os.stat().st_size method.
Thanks to ghostdog74 for pointing this out!
os.stat - st_size Gives the size in bytes. Can also be used to get file size and other file related information.
import os
nbytes = sum(d.stat().st_size for d in os.scandir('.') if d.is_file())
Update 2018
If you use Python 3.4 or previous then you may consider using the more efficient walk method provided by the third-party scandir package. In Python 3.5 and later, this package has been incorporated into the standard library and os.walk has received the corresponding increase in performance.
Update 2019
Recently I've been using pathlib more and more, here's a pathlib solution:
from pathlib import Path
root_directory = Path('.')
sum(f.stat().st_size for f in root_directory.glob('**/*') if f.is_file())
Some of the approaches suggested so far implement a recursion, others employ a shell or will not produce neatly formatted results. When your code is one-off for Linux platforms, you can get formatting as usual, recursion included, as a one-liner. Except for the print in the last line, it will work for current versions of python2 and python3:
du.py
-----
#!/usr/bin/python3
import subprocess
def du(path):
"""disk usage in human readable format (e.g. '2,1GB')"""
return subprocess.check_output(['du','-sh', path]).split()[0].decode('utf-8')
if __name__ == "__main__":
print(du('.'))
is simple, efficient and will work for files and multilevel directories:
$ chmod 750 du.py
$ ./du.py
2,9M
Here is a recursive function (it recursively sums up the size of all subfolders and their respective files) which returns exactly the same bytes as when running "du -sb ." in linux (where the "." means "the current folder"):
import os
def getFolderSize(folder):
total_size = os.path.getsize(folder)
for item in os.listdir(folder):
itempath = os.path.join(folder, item)
if os.path.isfile(itempath):
total_size += os.path.getsize(itempath)
elif os.path.isdir(itempath):
total_size += getFolderSize(itempath)
return total_size
print "Size: " + str(getFolderSize("."))
Using pathlib I came up with this one-liner to get the size of a folder:
sum(file.stat().st_size for file in Path(folder).rglob('*'))
And this is what I came up with for a nicely formatted output:
from pathlib import Path
def get_folder_size(folder):
return ByteSize(sum(file.stat().st_size for file in Path(folder).rglob('*')))
class ByteSize(int):
_KB = 1024
_suffixes = 'B', 'KB', 'MB', 'GB', 'PB'
def __new__(cls, *args, **kwargs):
return super().__new__(cls, *args, **kwargs)
def __init__(self, *args, **kwargs):
self.bytes = self.B = int(self)
self.kilobytes = self.KB = self / self._KB**1
self.megabytes = self.MB = self / self._KB**2
self.gigabytes = self.GB = self / self._KB**3
self.petabytes = self.PB = self / self._KB**4
*suffixes, last = self._suffixes
suffix = next((
suffix
for suffix in suffixes
if 1 < getattr(self, suffix) < self._KB
), last)
self.readable = suffix, getattr(self, suffix)
super().__init__()
def __str__(self):
return self.__format__('.2f')
def __repr__(self):
return '{}({})'.format(self.__class__.__name__, super().__repr__())
def __format__(self, format_spec):
suffix, val = self.readable
return '{val:{fmt}} {suf}'.format(val=val, fmt=format_spec, suf=suffix)
def __sub__(self, other):
return self.__class__(super().__sub__(other))
def __add__(self, other):
return self.__class__(super().__add__(other))
def __mul__(self, other):
return self.__class__(super().__mul__(other))
def __rsub__(self, other):
return self.__class__(super().__sub__(other))
def __radd__(self, other):
return self.__class__(super().__add__(other))
def __rmul__(self, other):
return self.__class__(super().__rmul__(other))
Usage:
>>> size = get_folder_size("c:/users/tdavis/downloads")
>>> print(size)
5.81 GB
>>> size.GB
5.810891855508089
>>> size.gigabytes
5.810891855508089
>>> size.PB
0.005674699077644618
>>> size.MB
5950.353260040283
>>> size
ByteSize(6239397620)
I also came across this question, which has some more compact and probably more performant strategies for printing file sizes.
Python 3.5 recursive folder size using os.scandir
def folder_size(path='.'):
total = 0
for entry in os.scandir(path):
if entry.is_file():
total += entry.stat().st_size
elif entry.is_dir():
total += folder_size(entry.path)
return total
for python3.5+
from pathlib import Path
def get_size(folder: str) -> int:
return sum(p.stat().st_size for p in Path(folder).rglob('*'))
Usage::
In [6]: get_size('/etc/not-exist-path')
Out[6]: 0
In [7]: get_size('.')
Out[7]: 12038689
In [8]: def filesize(size: int) -> str:
...: for unit in ("B", "K", "M", "G", "T"):
...: if size < 1024:
...: break
...: size /= 1024
...: return f"{size:.1f}{unit}"
...:
In [9]: filesize(get_size('.'))
Out[9]: '11.5M'
monknut answer is good but it fails on broken symlink, so you also have to check if this path really exists
if os.path.exists(fp):
total_size += os.stat(fp).st_size
The accepted answer doesn't take into account hard or soft links, and would count those files twice. You'd want to keep track of which inodes you've seen, and not add the size for those files.
import os
def get_size(start_path='.'):
total_size = 0
seen = {}
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
try:
stat = os.stat(fp)
except OSError:
continue
try:
seen[stat.st_ino]
except KeyError:
seen[stat.st_ino] = True
else:
continue
total_size += stat.st_size
return total_size
print get_size()
a recursive one-liner:
def getFolderSize(p):
from functools import partial
prepend = partial(os.path.join, p)
return sum([(os.path.getsize(f) if os.path.isfile(f) else getFolderSize(f)) for f in map(prepend, os.listdir(p))])
Chris' answer is good but could be made more idiomatic by using a set to check for seen directories, which also avoids using an exception for control flow:
def directory_size(path):
total_size = 0
seen = set()
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
try:
stat = os.stat(fp)
except OSError:
continue
if stat.st_ino in seen:
continue
seen.add(stat.st_ino)
total_size += stat.st_size
return total_size # size in bytes
A little late to the party but in one line provided that you have glob2 and humanize installed. Note that in Python 3, the default iglob has a recursive mode. How to modify the code for Python 3 is left as a trivial exercise for the reader.
>>> import os
>>> from humanize import naturalsize
>>> from glob2 import iglob
>>> naturalsize(sum(os.path.getsize(x) for x in iglob('/var/**'))))
'546.2 MB'
For the second part of the question
def human(size):
B = "B"
KB = "KB"
MB = "MB"
GB = "GB"
TB = "TB"
UNITS = [B, KB, MB, GB, TB]
HUMANFMT = "%f %s"
HUMANRADIX = 1024.
for u in UNITS[:-1]:
if size < HUMANRADIX : return HUMANFMT % (size, u)
size /= HUMANRADIX
return HUMANFMT % (size, UNITS[-1])
Get directory size
Properties of the solution:
returns both: the apparent size (number of bytes in the file) and the actual disk space the files uses.
counts hard linked files only once
counts symlinks the same way du does
does not use recursion
uses st.st_blocks for disk space used, thus works only on Unix-like systems
The code:
import os
def du(path):
if os.path.islink(path):
return (os.lstat(path).st_size, 0)
if os.path.isfile(path):
st = os.lstat(path)
return (st.st_size, st.st_blocks * 512)
apparent_total_bytes = 0
total_bytes = 0
have = []
for dirpath, dirnames, filenames in os.walk(path):
apparent_total_bytes += os.lstat(dirpath).st_size
total_bytes += os.lstat(dirpath).st_blocks * 512
for f in filenames:
fp = os.path.join(dirpath, f)
if os.path.islink(fp):
apparent_total_bytes += os.lstat(fp).st_size
continue
st = os.lstat(fp)
if st.st_ino in have:
continue # skip hardlinks which were already counted
have.append(st.st_ino)
apparent_total_bytes += st.st_size
total_bytes += st.st_blocks * 512
for d in dirnames:
dp = os.path.join(dirpath, d)
if os.path.islink(dp):
apparent_total_bytes += os.lstat(dp).st_size
return (apparent_total_bytes, total_bytes)
Example usage:
>>> du('/lib')
(236425839, 244363264)
$ du -sb /lib
236425839 /lib
$ du -sB1 /lib
244363264 /lib
Human readable file size
Properties of the solution:
Supports up to Yottabytes
Supports SI Units or IEC Units
Support custom suffixes
The code:
def humanized_size(num, suffix='B', si=False):
if si:
units = ['','K','M','G','T','P','E','Z']
last_unit = 'Y'
div = 1000.0
else:
units = ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']
last_unit = 'Yi'
div = 1024.0
for unit in units:
if abs(num) < div:
return "%3.1f%s%s" % (num, unit, suffix)
num /= div
return "%.1f%s%s" % (num, last_unit, suffix)
Example usage:
>>> humanized_size(236425839)
'225.5MiB'
>>> humanized_size(236425839, si=True)
'236.4MB'
>>> humanized_size(236425839, si=True, suffix='')
'236.4M'
for getting the size of one file, there is os.path.getsize()
>>> import os
>>> os.path.getsize("/path/file")
35L
its reported in bytes.
You can do something like this :
import commands
size = commands.getoutput('du -sh /path/').split()[0]
in this case I have not tested the result before returning it, if you want you can check it with commands.getstatusoutput.
One-liner you say...
Here is a one liner:
sum([sum(map(lambda fname: os.path.getsize(os.path.join(directory, fname)), files)) for directory, folders, files in os.walk(path)])
Although I would probably split it out and it performs no checks.
To convert to kb see Reusable library to get human readable version of file size? and work it in
The following script prints directory size of all sub-directories for the specified directory. It also tries to benefit (if possible) from caching the calls of a recursive functions. If an argument is omitted, the script will work in the current directory. The output is sorted by the directory size from biggest to smallest ones. So you can adapt it for your needs.
PS i've used recipe 578019 for showing directory size in human-friendly format (http://code.activestate.com/recipes/578019/)
from __future__ import print_function
import os
import sys
import operator
def null_decorator(ob):
return ob
if sys.version_info >= (3,2,0):
import functools
my_cache_decorator = functools.lru_cache(maxsize=4096)
else:
my_cache_decorator = null_decorator
start_dir = os.path.normpath(os.path.abspath(sys.argv[1])) if len(sys.argv) > 1 else '.'
#my_cache_decorator
def get_dir_size(start_path = '.'):
total_size = 0
if 'scandir' in dir(os):
# using fast 'os.scandir' method (new in version 3.5)
for entry in os.scandir(start_path):
if entry.is_dir(follow_symlinks = False):
total_size += get_dir_size(entry.path)
elif entry.is_file(follow_symlinks = False):
total_size += entry.stat().st_size
else:
# using slow, but compatible 'os.listdir' method
for entry in os.listdir(start_path):
full_path = os.path.abspath(os.path.join(start_path, entry))
if os.path.isdir(full_path):
total_size += get_dir_size(full_path)
elif os.path.isfile(full_path):
total_size += os.path.getsize(full_path)
return total_size
def get_dir_size_walk(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size
def bytes2human(n, format='%(value).0f%(symbol)s', symbols='customary'):
"""
(c) http://code.activestate.com/recipes/578019/
Convert n bytes into a human readable string based on format.
symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
see: http://goo.gl/kTQMs
>>> bytes2human(0)
'0.0 B'
>>> bytes2human(0.9)
'0.0 B'
>>> bytes2human(1)
'1.0 B'
>>> bytes2human(1.9)
'1.0 B'
>>> bytes2human(1024)
'1.0 K'
>>> bytes2human(1048576)
'1.0 M'
>>> bytes2human(1099511627776127398123789121)
'909.5 Y'
>>> bytes2human(9856, symbols="customary")
'9.6 K'
>>> bytes2human(9856, symbols="customary_ext")
'9.6 kilo'
>>> bytes2human(9856, symbols="iec")
'9.6 Ki'
>>> bytes2human(9856, symbols="iec_ext")
'9.6 kibi'
>>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
'9.8 K/sec'
>>> # precision can be adjusted by playing with %f operator
>>> bytes2human(10000, format="%(value).5f %(symbol)s")
'9.76562 K'
"""
SYMBOLS = {
'customary' : ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
'customary_ext' : ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa',
'zetta', 'iotta'),
'iec' : ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
'iec_ext' : ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi',
'zebi', 'yobi'),
}
n = int(n)
if n < 0:
raise ValueError("n < 0")
symbols = SYMBOLS[symbols]
prefix = {}
for i, s in enumerate(symbols[1:]):
prefix[s] = 1 << (i+1)*10
for symbol in reversed(symbols[1:]):
if n >= prefix[symbol]:
value = float(n) / prefix[symbol]
return format % locals()
return format % dict(symbol=symbols[0], value=n)
############################################################
###
### main ()
###
############################################################
if __name__ == '__main__':
dir_tree = {}
### version, that uses 'slow' [os.walk method]
#get_size = get_dir_size_walk
### this recursive version can benefit from caching the function calls (functools.lru_cache)
get_size = get_dir_size
for root, dirs, files in os.walk(start_dir):
for d in dirs:
dir_path = os.path.join(root, d)
if os.path.isdir(dir_path):
dir_tree[dir_path] = get_size(dir_path)
for d, size in sorted(dir_tree.items(), key=operator.itemgetter(1), reverse=True):
print('%s\t%s' %(bytes2human(size, format='%(value).2f%(symbol)s'), d))
print('-' * 80)
if sys.version_info >= (3,2,0):
print(get_dir_size.cache_info())
Sample output:
37.61M .\subdir_b
2.18M .\subdir_a
2.17M .\subdir_a\subdir_a_2
4.41K .\subdir_a\subdir_a_1
----------------------------------------------------------
CacheInfo(hits=2, misses=4, maxsize=4096, currsize=4)
EDIT: moved null_decorator above, as user2233949 recommended
use library sh: the module du does it:
pip install sh
import sh
print( sh.du("-s", ".") )
91154728 .
if you want to pass asterix, use glob as described here.
to convert the values in human readables, use humanize:
pip install humanize
import humanize
print( humanize.naturalsize( 91157384 ) )
91.2 MB
For what it's worth... the tree command does all of this for free:
tree -h --du /path/to/dir # files and dirs
tree -h -d --du /path/to/dir # dirs only
I love Python, but by far the simplest solution to the problem requires no new code.
It is handy:
import os
import stat
size = 0
path_ = ""
def calculate(path=os.environ["SYSTEMROOT"]):
global size, path_
size = 0
path_ = path
for x, y, z in os.walk(path):
for i in z:
size += os.path.getsize(x + os.sep + i)
def cevir(x):
global path_
print(path_, x, "Byte")
print(path_, x/1024, "Kilobyte")
print(path_, x/1048576, "Megabyte")
print(path_, x/1073741824, "Gigabyte")
calculate("C:\Users\Jundullah\Desktop")
cevir(size)
Output:
C:\Users\Jundullah\Desktop 87874712211 Byte
C:\Users\Jundullah\Desktop 85815148.64355469 Kilobyte
C:\Users\Jundullah\Desktop 83803.85609722137 Megabyte
C:\Users\Jundullah\Desktop 81.83970321994275 Gigabyte
Here is a one liner that does it recursively (recursive option available as of Python 3.5):
import os
import glob
print(sum(os.path.getsize(f) for f in glob.glob('**', recursive=True) if os.path.isfile(f))/(1024*1024))
def recursive_dir_size(path):
size = 0
for x in os.listdir(path):
if not os.path.isdir(os.path.join(path,x)):
size += os.stat(os.path.join(path,x)).st_size
else:
size += recursive_dir_size(os.path.join(path,x))
return size
I wrote this function which gives me accurate overall size of a directory, i tried other for loop solutions with os.walk but i don't know why the end result was always less than the actual size (on ubuntu 18 env). I must have done something wrong but who cares wrote this one works perfectly fine.
I'm using python 2.7.13 with scandir and here's my one-liner recursive function to get the total size of a folder:
from scandir import scandir
def getTotFldrSize(path):
return sum([s.stat(follow_symlinks=False).st_size for s in scandir(path) if s.is_file(follow_symlinks=False)]) + \
+ sum([getTotFldrSize(s.path) for s in scandir(path) if s.is_dir(follow_symlinks=False)])
>>> print getTotFldrSize('.')
1203245680
https://pypi.python.org/pypi/scandir
When size of the sub-directories is computed, it should update its parent's folder size and this will go on till it reaches the root parent.
The following function computes the size of the folder and all its sub-folders.
import os
def folder_size(path):
parent = {} # path to parent path mapper
folder_size = {} # storing the size of directories
folder = os.path.realpath(path)
for root, _, filenames in os.walk(folder):
if root == folder:
parent[root] = -1 # the root folder will not have any parent
folder_size[root] = 0.0 # intializing the size to 0
elif root not in parent:
immediate_parent_path = os.path.dirname(root) # extract the immediate parent of the subdirectory
parent[root] = immediate_parent_path # store the parent of the subdirectory
folder_size[root] = 0.0 # initialize the size to 0
total_size = 0
for filename in filenames:
filepath = os.path.join(root, filename)
total_size += os.stat(filepath).st_size # computing the size of the files under the directory
folder_size[root] = total_size # store the updated size
temp_path = root # for subdirectories, we need to update the size of the parent till the root parent
while parent[temp_path] != -1:
folder_size[parent[temp_path]] += total_size
temp_path = parent[temp_path]
return folder_size[folder]/1000000.0
A solution that works on Python 3.6 using pathlib.
from pathlib import Path
sum([f.stat().st_size for f in Path("path").glob("**/*")])
Python 3.6+ recursive folder/file size using os.scandir. As powerful as in the answer by #blakev, but shorter and in EAFP python style.
import os
def size(path, *, follow_symlinks=False):
try:
with os.scandir(path) as it:
return sum(size(entry, follow_symlinks=follow_symlinks) for entry in it)
except NotADirectoryError:
return os.stat(path, follow_symlinks=follow_symlinks).st_size
du does not follow symlinks by default. No answer here make use of follow_symlinks=False.
Here is an implementation which follows default behavior of du:
def du(path) -> int:
total = 0
for entry in os.scandir(path):
if entry.is_file(follow_symlinks=False):
total += entry.stat().st_size
elif entry.is_dir(follow_symlinks=False):
total += du(entry.path)
return total
Test:
class Test(unittest.TestCase):
def test_du(self):
root = '/tmp/du_test'
subprocess.run(['rm', '-rf', root])
test_utils.mkdir(root)
test_utils.create_file(root, 'A', '1M')
test_utils.create_file(root, 'B', '1M')
sub = '/'.join([root, 'sub'])
test_utils.mkdir(sub)
test_utils.create_file(sub, 'C', '1M')
test_utils.create_file(sub, 'D', '1M')
subprocess.run(['ln', '-s', '/tmp', '/'.join([root, 'link']), ])
self.assertEqual(4 << 20, util.du(root))
import os
def get_size(path = os.getcwd()):
print("Calculating Size: ",path)
total_size = 0
#if path is directory--
if os.path.isdir(path):
print("Path type : Directory/Folder")
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
#if path is a file---
elif os.path.isfile(path):
print("Path type : File")
total_size=os.path.getsize(path)
else:
print("Path Type : Special File (Socket, FIFO, Device File)" )
total_size=0
bytesize=total_size
print(bytesize, 'bytes')
print(bytesize/(1024), 'kilobytes')
print(bytesize/(1024*1024), 'megabytes')
print(bytesize/(1024*1024*1024), 'gegabytes')
return total_size
x=get_size("/content/examples")
I'm sure this helps! For folders and files as well!
This script tells you which file is the biggest in the CWD and also tells you in which folder the file is.
This script works for me on win8 and python 3.3.3 shell
import os
folder=os.cwd()
number=0
string=""
for root, dirs, files in os.walk(folder):
for file in files:
pathname=os.path.join(root,file)
## print (pathname)
## print (os.path.getsize(pathname)/1024/1024)
if number < os.path.getsize(pathname):
number = os.path.getsize(pathname)
string=pathname
## print ()
print (string)
print ()
print (number)
print ("Number in bytes")
Admittedly, this is kind of hackish and only works on Unix/Linux.
It matches du -sb . because in effect this is a Python bash wrapper that runs the du -sb . command.
import subprocess
def system_command(cmd):
""""Function executes cmd parameter as a bash command."""
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
stdout, stderr = p.communicate()
return stdout, stderr
size = int(system_command('du -sb . ')[0].split()[0])

Python call the recursive function on every element in the list

I am working on this recursive function called traverseDir, everything was going well until here, I don't know how to iterate over the list and call my traverseDir function on every element in the list. Thanks a lot if you can help!
path = sys.argv[1]
def traverseDir(path):
allFile = 0
someFile = 0
if os.path.isfile(path): # base case
print(sys.argv[0])
allFile += 1
if path.endswith('.some'):
someFile += 1
else:
files = os.listdir(path)
return files
#if len(files[0]) <= 1: these 2 lines are where I can't figure out
# return traverseDir(item)
A built in function called os.walk already does this. However, for the sake of your question, you need to iterate over your files list. You will also need to pass all_files and some_files down through the recursion so they can accumulate as they go. You'll also need to return all_files and some_files (python lets you return multiple values as a tuple). You can then add the recursively returned values for all_files and some_files.
def traverseDir(path, all_files=0, some_files=0):
# ... your existing code
files = os.listdir(path)
for f in files:
# extend the path
full_path = os.path.join(path, f)
# unroll the returned values from the recursion
rec_all_files, rec_some_files = traverseDir(full_path, all_files, some_files)
# accumulate the values
all_files += rec_all_files
some_files += rec_some_files
return all_files, some_files
This will call traverseDir() on every file listed. In turn, that recursion will call traverseDir() on every file it lists.
os.listdir
import sys
import os
path = sys.argv[1]
def traverseDir(path):
allFile = 0
someFile = 0
if os.path.isfile(path):
allFile += 1
if path.endswith('.some'):
someFile += 1
else:
for file in os.listdir(path):
agAllFile, agSomeFile = traverseDir(os.path.join(path, file))
allFile += agAllFile
someFile += agSomeFile
return allFile, someFile
print traverseDir(path)
os.walk
import sys
import os
path = sys.argv[1]
def traverseDir(path):
files = [file for dirFiles in os.walk(path) for file in dirFiles[2]]
return len(files), len([file for file in files if file.endswith('.some')])
print traverseDir(path)

Python keep multiple counters in one recursion function

I am trying to count the number of python files and non-python files in a path recursively.
import os
def main():
#path = input('Enter an existing path to a file or directory: ')
path ='/Users/ziyuanhan/PycharmProjects/lab6/'
print(count_file(path, counter={'py':0, 'non_py':0}))
def count_file(path,counter):
if os.path.isfile(path):
if path.endswith('.py') :
counter['py']+=1
return path, counter
else:
counter['non_py']+=1
return path, counter
elif os.path.isdir(path):
for files in os.listdir(path):
print(files)
path = os.path.abspath(files)
print(path)
count_file(path, counter)
return path, counter
main()
The few problems I have is
I had trouble in keeping multiple counters in one recursion function.
Also the return I want is a dictionary format, but I can only do it this way because I have to return it with path.
I use print(files) to check if the function is working alright, but it shows a lot more files(the top 7 files) I never seen in my folder, why is this happening?
When print(files)
/Library/Frameworks/Python.framework/Versions/3.5/bin/python3.5
/Users/ziyuanhan/PycharmProjects/lab7/recursive_dir_traversal.py
.DS_Store
/Users/ziyuanhan/PycharmProjects/lab7/.DS_Store
.idea
/Users/ziyuanhan/PycharmProjects/lab7/.idea
lab7.iml
/Users/ziyuanhan/PycharmProjects/lab7/lab7.iml
misc.xml
/Users/ziyuanhan/PycharmProjects/lab7/misc.xml
modules.xml
/Users/ziyuanhan/PycharmProjects/lab7/modules.xml
workspace.xml
/Users/ziyuanhan/PycharmProjects/lab7/workspace.xml
km_mi_table.py
/Users/ziyuanhan/PycharmProjects/lab7/km_mi_table.py
km_to_miles.py
/Users/ziyuanhan/PycharmProjects/lab7/km_to_miles.py
wordfrequency.py
/Users/ziyuanhan/PycharmProjects/lab7/wordfrequency.py
('/Users/ziyuanhan/PycharmProjects/lab7/wordfrequency.py', {'non_py': 0, 'py': 0})
BTW we have to use recursive function, it is mandatory as the Prof requested.
You don't need to iterate directory recursively yourself. You can use os.walk which yields directories, files for you:
You cannot change local variable / argument of caller. How about returns total_python, total_non_python and use in caller like below?
def count_file(path):
total_python, total_non_python = 0, 0
for parent, directories, files in os.walk(path):
for filename in files:
if filename.lower().endswith('.py'):
total_python += 1
else:
total_non_python += 1
return total_python, total_non_python
def main():
path = input('Enter a path to a file or directory: ')
total_python, total_non_python = count_file(path)
print(path, total_python, total_non_python)
Alternatively, os.scandir is also available since Python 3.5.
You can pass a dictionary as an argument to the function and change the values of the items in the dictionary.
First intialize the dictionary:
counters = {'py': 0, 'other': 0}
Then modify it inside the recursive function:
counters['py'] += 1
This will work because dictionaries are mutable.
This function takes a pathname and returns (total_python, total_not_python). It calls itself on each entries in directories. This is meant to be as close to the given code as reasonable.
def count_file(path):
if os.path.isfile(path):
if path.endswith('.py') :
return 1, 0
else:
return 0, 1
elif os.path.isdir(path):
total_python, total_not_python = 0, 0
for files in os.listdir(path):
print(files)
path = os.path.join(path, files)
subtotal_python, subtotal_python = count_file(path)
total_python += subtotal_python
total_not_python += subtotal_not_python
return total_python, total_not_python

delete older folder with similar name using python

I need to iterate over a folder tree. I have to check each subfolder, which looks like this:
moduleA-111-date
moduleA-112-date
moduleA-113-date
moduleB-111-date
moduleB-112-date
etc.
I figured out how to iterate over a folder tree. I can also use stat with mtime to get the date of the folder which seems easier than parsing the name of the date.
How do I single out modules with the same prefix (such as "moduleA") and compare their mtime's so I can delete the oldest?
Since you have no code, I assume that you're looking for design help. I'd lead my students to something like:
Make a list of the names
From each name, find the prefix, such as "moduleA. Put those in a set.
For each prefix in the set
Find all names with that prefix; put these in a temporary list
Sort this list.
For each file in this list *except* the last (newest)
delete the file
Does this get you moving?
I'm posting the code (answer) here, I suppose my question wasn't clear since I'm getting minus signs but anyway the solution wasn't as straight forward as I thought, I'm sure the code could use some fine tuning but it get's the job done.
#!/usr/bin/python
import os
import sys
import fnmatch
import glob
import re
import shutil
##########################################################################################################
#Remove the directory
def remove(path):
try:
shutil.rmtree(path)
print "Deleted : %s" % path
except OSError:
print OSError
print "Unable to remove folder: %s" % path
##########################################################################################################
#This function will look for the .sh files in a given path and returns them as a list.
def searchTreeForSh(path):
full_path = path+'*.sh'
listOfFolders = glob.glob(full_path)
return listOfFolders
##########################################################################################################
#Gets the full path to files containig .sh and returns a list of folder names (prefix) to be acted upon.
#listOfScripts is a list of full paths to .sh file
#dirname is the value that holds the root directory where listOfScripts is operating in
def getFolderNames(listOfScripts):
listOfFolders = []
folderNames = []
for foldername in listOfScripts:
listOfFolders.append(os.path.splitext(foldername)[0])
for folders in listOfFolders:
folder = folders.split('/')
foldersLen=len(folder)
folderNames.append(folder[foldersLen-1])
folderNames.sort()
return folderNames
##########################################################################################################
def minmax(items):
return max(items)
##########################################################################################################
#This function will check the latest entry in the tuple provided, and will then send "everything" to the remove function except that last entry
def sortBeforeDelete(statDir, t):
count = 0
tuple(statDir)
timeNotToDelete = minmax(statDir)
for ff in t:
if t[count][1] == timeNotToDelete:
count += 1
continue
else:
remove(t[count][0])
count += 1
##########################################################################################################
#A loop to run over the fullpath which is broken into items (see os.listdir above), elemenates the .sh and the .txt files, leaves only folder names, then matches it to one of the
#name in the "folders" variable
def coolFunction(folderNames, path):
localPath = os.listdir(path)
for folder in folderNames:
t = () # a tuple to act as sort of a dict, it will hold the folder name and it's equivalent st_mtime
statDir = [] # a list that will hold the st_mtime for all the folder names in subDirList
for item in localPath:
if os.path.isdir(path + item) == True:
if re.search(folder, item):
mtime = os.stat(path + '/' + item)
statDir.append(mtime.st_mtime)
t = t + ((path + item,mtime.st_mtime),)# the "," outside the perenthasis is how to make t be a list of lists and not set the elements one after theother.
if t == ():continue
sortBeforeDelete(statDir, t)
##########################################################################################################
def main(path):
dirs = os.listdir(path)
for component in dirs:
if os.path.isdir(component) == True:
newPath = path + '/' + component + '/'
listOfFolders= searchTreeForSh(newPath)
folderNames = getFolderNames(listOfFolders)
coolFunction(folderNames, newPath)
##########################################################################################################
if __name__ == "__main__":
main(sys.argv[1])

Categories