This is a script searching for files that are bigger than a specified size:
def size_scan(folder, size=100000000):
"""Scan folder for files bigger than specified size
folder: abspath
size: size in bytes
"""
flag = False
for folder, subfolders, files in os.walk(folder):
# skip 'anaconda3' folder
if 'anaconda3' in folder:
continue
for file in files:
file_path = os.path.join(folder, file)
if os.path.getsize(file_path) > size:
print(file_path, ':', os.path.getsize(file_path))
flag = True
if not flag:
print('There is nothing, Cleric')
I get the following error message while scanning root folder in Linux:
Traceback (most recent call last):
File "<ipython-input-123-d2865b8a190c>", line 1, in <module>
runfile('/home/ozramsay/Code/sizescan.py', wdir='/home/ozramsay/Code')
File "/home/ozramsay/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "/home/ozramsay/anaconda3/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/home/ozramsay/Code/sizescan.py", line 32, in <module>
size_scan('/')
File "/home/ozramsay/Code/sizescan.py", line 25, in size_scan
if os.path.getsize(file_path) > size:
File "/home/ozramsay/anaconda3/lib/python3.6/genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError: [Errno 2] No such file or directory: '/run/udev/link.dvdrw'
I guessed it is because Python interpreter can not scan itself, so I tried to skip 'anaconda3' folder from the search (marked by #skip anaconda folder in the code above). However, the error message remained the same.
Can anyone please explain?
(Please let me know if such kind of questions is not allowed here and should be edited. Thank you)
The file python is trying get the size of with os.stat(filename).st_size is a broken link. A broken link is a link that has had it's target removed. It is much like an internet link that gives a 404. To fix this in your script, check if it is a file (preferred), or use a try/catch (not preferred). To check if the file is a file and not a broken link, use os.path.isfile(file_path). Your code should look like this:
def size_scan(folder, size=100000000):
"""Scan folder for files bigger than specified size
folder: abspath
size: size in bytes
"""
flag = False
for folder, subfolders, files in os.walk(folder):
# skip 'anaconda3' folder
if 'anaconda3' in folder:
continue
for file in files:
file_path = os.path.join(folder, file)
if os.path.isfile(file_path) and (os.path.getsize(file_path) > size):
print(file_path, ':', os.path.getsize(file_path))
flag = True
if not flag:
print('There is nothing, Cleric')
So before it gets the size, it checks if the file is really there, following all links to make sure it exists. Related SO post.
Related
I have a set of files in a directory and I'm making use of python-magic library to filter out files that are of type "text/plain" and remove all the non 'text/plain' files. Below is the code I'm using
import os
import magic
def ftype(path):
fpath = path
mime = magic.Magic(mime=True)
for root, dirs, fnames in os.walk(path):
for fname in fnames:
mi = mime.from_file(fpath+'\\'+fname)
if not mi.endswith('plain'):
os.remove(fpath + '\\' + fname)
print(fname)
else:
pass
ftype('filepath')
I'm able to run the script successfully on a small set of files. However when I ran the script on a directory that had about 40000 files I get the below error.
Traceback (most recent call last):
File "C:\Users\dmg\AppData\Local\Programs\Python\Python37\lib\site-packages\magic\magic.py", line 91, in from_file
return self._handle509Bug(e)
File "C:\Users\dmg\AppData\Local\Programs\Python\Python37\lib\site-packages\magic\magic.py", line 100, in _handle509Bug
raise e
File "C:\Users\dmg\AppData\Local\Programs\Python\Python37\lib\site-packages\magic\magic.py", line 89, in from_file
return maybe_decode(magic_file(self.cookie, filename))
File "C:\Users\dmg\AppData\Local\Programs\Python\Python37\lib\site-packages\magic\magic.py", line 255, in magic_file
return _magic_file(cookie, coerce_filename(filename))
File "C:\Users\dmg\AppData\Local\Programs\Python\Python37\lib\site-packages\magic\magic.py", line 196, in errorcheck_null
raise MagicException(err)
magic.magic.MagicException: b"line I64u: regex error 14 for `^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$', (failed to get memory)"
I'm not sure what is the issue. Can someone help me with this or if there are any alternative approaches to do the above stated operation.
Update : Issue still exists after trying out some methods stated in the below comments.
I am trying to use the following code to unzip all the zip folders in my root folder; this code was found on this thread:
Unzip zip files in folders and subfolders with python
rootPath = u"//rootdir/myfolder" # CHOOSE ROOT FOLDER HERE
pattern = '*.zip'
for root, dirs, files in os.walk(rootPath):
for filename in fnmatch.filter(files, pattern):
print(os.path.join(root, filename))
zipfile.ZipFile(os.path.join(root, filename)).extractall(os.path.join(root, os.path.splitext(filename)[0]))
but I keep getting this error that says FileNotFoundError saying the xlsx file does not exist:
Traceback (most recent call last):
File "//rootdir/myfolder/Python code/unzip_helper.py", line 29, in <module>
zipfile.ZipFile(os.path.join(root, filename)).extractall(os.path.join(root, os.path.splitext(filename)[0]))
File "//rootdir/myfolder/Python\Python36-32\lib\zipfile.py", line 1491, in extractall
self.extract(zipinfo, path, pwd)
File "//myaccount/Local\Programs\Python\Python36-32\lib\zipfile.py", line 1479, in extract
return self._extract_member(member, path, pwd)
File "//myaccount/Local\Programs\Python\Python36-32\lib\zipfile.py", line 1542, in _extract_member
open(targetpath, "wb") as target:
FileNotFoundError: [Errno 2] No such file or directory: '\\rootdir\myfolder\._SGS Naked 3 01 WS Kappa Coated and a very long very long file name could this be a problem i dont think so.xlsx'
My question is, why would it want to unzip this excel file anyways?!
And how can I get rid of the error?
I've also tried using r instead of u for rootPath:
rootPath = r"//rootdir/myfolder"
and I get the same error.
Any help is truly appreciated!
Some filenames and directory names may have extra dots in their names, as a consequence the last line, unlike Windows filenames can have dots on Unix:
zipfile.ZipFile(os.path.join(root, filename)).extractall(os.path.join(root, os.path.splitext(filename)[0]))
this line fails. To see how that happens:
>>> filename = "my.arch.zip"
>>> root = "/my/path/to/mydir/"
>>> os.path.join(root, os.path.splitext(filename)[0])
'/my/path/to/mydir/my.arch'
With or without extra dots, problems will still take place in your code:
>>> os.path.join(root, os.path.splitext(filename)[0])
'/my/path.to/mydir/arch'
If no '/my/path.to/mydir/arch' can be found, FileNotFoundError will be raised. I suggest that you be explicit in you path, otherwise you have to ensure the existence of those directories.
ZipFile.extractall(path=None, members=None, pwd=None)
Extract all members from the archive to the current working directory. path specifies a different directory to extract to...
Unless path is an existent directory, FileNotFoundError will be raised.
In conjunction with my last question, I'm onto printing the filenames with their sizes next to them in a sort of list. Basically I am reading filenames from one file (which are added by the user), taking the filename and putting it in the path of the working directory to print it's size one-by-one, however I'm having an issue with the following block:
print("\n--- Stats ---\n")
with open('userdata/addedfiles', 'r') as read_files:
file_lines = read_files.readlines()
# get path for each file and find in trackedfiles
# use path to get size
print(len(file_lines), "files\n")
for file_name in file_lines:
# the actual files should be in the same working directory
cwd = os.getcwd()
fpath = os.path.join(cwd, file_name)
fsize = os.path.getsize(fpath)
print(file_name.strip(), "-- size:", fsize)
which is returning this error:
tolbiac wpm-public → ./main.py --filestatus
--- Stats ---
1 files
Traceback (most recent call last):
File "./main.py", line 332, in <module>
main()
File "./main.py", line 323, in main
parseargs()
File "./main.py", line 317, in parseargs
tracking()
File "./main.py", line 204, in tracking
fsize = os.path.getsize(fpath)
File "/usr/lib/python3.4/genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError: [Errno 2] No such file or directory: '/home/tolbiac/code/wpm-public/file.txt\n'
tolbiac wpm-public →
So it looks like something is adding a \n to the end of file_name, I'm not sure if thats something used in the getsize module, I tried this with os.stat, but it did the same thing.
Any suggestions? Thanks.
When you're reading in a file, you need to be aware of how the data is being seperated. In this case, the read-in file has a filename once per line seperated out by that \n operator. Need to strip it then before you use it.
for file_name in file_lines:
file_name = file_name.strip()
# rest of for loop
I have a compressed .zip file that i want to convert to an uncompressed .tar file.
I made this function to do it:
def decompress(filepath):
devname = re.search("_(.+?)_.+?\.zip", filepath).group(1)
with zipfile.ZipFile(filepath, 'r') as zip:
path = os.path.join(start_dir, "Downloads", devname, str(os.path.basename(filepath))[:-4])
zip.extractall(path)
with tarfile.open(path + ".tar", 'w') as tar:
for object in os.listdir(path):
tar.add(os.path.join(path, object), arcname=object)
time.sleep(2)
shutil.rmtree(path, ignore_errors=False, onerror=onError)
time.sleep(0.5)
os.remove(filepath)
return path + ".tar"
I am getting this error when running it:
Traceback (most recent call last):
File "File.py", line 195, in <module>
main()
File "File.py", line 184, in main
dld = download()
File "File.py", line 132, in download
filename = decompress(os.path.join(start_dir, "Downloads", devname, filename
))
File "File.py", line 103, in decompress
shutil.rmtree(path, ignore_errors=False, onerror=onError)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 247, in rmtree
rmtree(fullname, ignore_errors, onerror)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 247, in rmtree
rmtree(fullname, ignore_errors, onerror)
File "C:\Program Files (x86)\python27\lib\shutil.py", line 256, in rmtree
onerror(os.rmdir, path, sys.exc_info())
File "C:\Program Files (x86)\python27\lib\shutil.py", line 254, in rmtree
os.rmdir(path)
WindowsError: [Error 145] The directory is not empty: 'C:\\Users\\Vaibhav\\Deskt
op\\Folder\\Downloads\\toro\\_toro_nightly_test\\system\\app'
Here is my onError that I got from https://stackoverflow.com/a/2656405/2518263:
def onError(func, path, exc_info):
"""
Error handler for ``shutil.rmtree``.
If the error is due to an access error (read only file)
it attempts to add write permission and then retries.
If the error is for another reason it re-raises the error.
Usage : ``shutil.rmtree(path, onerror=onerror)``
"""
if not os.access(path, os.W_OK):
# Is the error an access error ?
os.chmod(path, stat.S_IWUSR)
func(path)
else:
raise
I only get the error on random occasions. The decompress function works 75% of the time.
I don't know why I'm getting this error. Can someone suggest a better way to do this or a way to solve this error.
I just replaced:
shutil.rmtree(path, ignore_errors=False, onerror=onError)
with:
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
filename = os.path.join(root, name)
os.chmod(filename, stat.S_IWUSR)
os.remove(filename)
for name in dirs:
os.rmdir(os.path.join(root, name))
time.sleep(0.5)
os.rmdir(path)
Now it works like a charm.
EDIT:
Never mind! I still get the error but just less often, now its about 20% of the time!! Please help!
EDIT 2:
OK, so I don't get the error if i extract to a temporary file so I am using this code:
import tempfile
def decompress(filepath):
with zipfile.ZipFile(filepath) as zip:
tmp = tempfile.mkdtemp(dir=os.getcwd())
zip.extractall(tmp)
with tarfile.open(filepath[:-4] + ".tar", 'w') as tar:
for object in os.listdir(tmp):
tar.add(os.path.join(tmp, object), arcname=object)
time.sleep(1)
shutil.rmtree(tmp)
os.remove(filepath)
return filepath[:-4] + ".tar"
I works now! (hopefully the error won't occur again)
EDIT 3:
I got the error again!!!!!!!!!! this is really getting on my nerves. Please help someone.
Looks like some process in your OS manages to create another file in the directory you are in process of deletion of.
I can suggest you to avoid creation of temporary files and feed decompressed parts of original ZIP directly into new TAR file.
This require matching ZipInfo fields to TarInfo ones but it should be straightforward.
Here's my take on it:
def zip2tar(zipname, tarname):
zipf = zipfile.ZipFile(zipname, 'r')
tarf = tarfile.TarFile(tarname, 'w')
timeshift = int((datetime.datetime.now() -
datetime.datetime.utcnow()).total_seconds())
for zipinfo in zipf.infolist():
tarinfo = tarfile.TarInfo()
tarinfo.name = zipinfo.filename
tarinfo.size = zipinfo.file_size
tarinfo.mtime = calendar.timegm(zipinfo.date_time) - timeshift
if zipinfo.internal_attr & 1:
tarinfo.mode = 0666
tarinfo.type = tarfile.REGTYPE
else:
tarinfo.mode = 0777
tarinfo.type = tarfile.DIRTYPE
infile = zipf.open(zipinfo.filename)
tarf.addfile(tarinfo, infile)
zipf.close()
tarf.close()
I'm having trouble figuring out how to move all .log and .txt files in a certain folder and it's subdirectories to a new folder. I understand how to move one file with shutil. But, I tried to use a loop, unsuccessfully, to move all. Can someone help me with this? Thanks ....
import os, os.path
import re
def print_tgzLogs (arg, dir, files):
for file in files:
path = os.path.join (dir, file)
path = os.path.normcase (path)
defaultFolder = "Log_Text_Files"
if not defaultFolder.endswith(':') and not os.path.exists('c:\\Extracted\Log_Text_Files'):
os.mkdir('C:\\Extracted\\Log_Text_Files')
if re.search(r".*\.txt$", path) or re.search(r".*\.log$", path):
os.rename(path, 'C:\\Extracted\\Log_Text_Files')
print path
os.path.walk('C:\\Extracted\\storage', print_tgzLogs, 0)
Below is the trace back error:
Traceback (most recent call last):
File "C:\SQA_log\scan.py", line 20, in <module>
os.path.walk('C:\\Extracted\\storage', print_tgzLogs, 0)
File "C:\Python27\lib\ntpath.py", line 263, in walk
walk(name, func, arg)
File "C:\Python27\lib\ntpath.py", line 263, in walk
walk(name, func, arg)
File "C:\Python27\lib\ntpath.py", line 263, in walk
walk(name, func, arg)
File "C:\Python27\lib\ntpath.py", line 259, in walk
func(arg, top, names)
File "C:\SQA_log\scan.py", line 16, in print_tgzLogs
os.rename(path, 'C:\\Extracted\\Log_Text_Files')
WindowsError: [Error 183] Cannot create a file when that file already exists
According to the traceback, the log-files are already existing. The Python docs to the os.rename say:
On Windows, if dst already exists, OSError will be raised [...].
Now you can either:
delete the files manually or
delete the files automatically using os.remove(path)
If you want the files to be automatically deleted, the code would look like this (notice that I replaced your regular expression with the python endswith as suggested by utdemir):
import os, os.path
def print_tgzLogs (arg, dir, files):
for file in files:
path = os.path.join (dir, file)
path = os.path.normcase (path)
defaultFolder = "Log_Text_Files"
if not defaultFolder.endswith(':') and not os.path.exists('c:\\Extracted\Log_Text_Files'):
os.mkdir('C:\\Extracted\\Log_Text_Files')
if path.endswith(".txt") or path.endswith(".log"):
if os.path.exists('C:\\Extracted\\Log_Text_Files\\%s' % file):
os.remove('C:\\Extracted\\Log_Text_Files\\%s' % file)
os.rename(path, 'C:\\Extracted\\Log_Text_Files\\%s' % file)
print path
os.path.walk('C:\\Extracted\\storage', print_tgzLogs, 0)
It looks like are trying to use
os.rename(path, 'C:\\Extracted\\Log_Text_Files')
to move the file path into the directory C:\Extracted\Log_Text_Files, but rename doesn't work like this: it's going to try to make a new file named C:\Extracted\Log_Text_Files. You probably want something more like this:
os.rename(path, os.path.join('C:\\Extracted\\Log_Text_Files',os.path.basename(path))