I have a set of *.tar.xz archives. Each of them may contain APK or JAR files, that indeed are zip archives. I'm trying to search for some pattern inside content of that zip archives. I use next code to accomplish it:
#! /usr/bin/env python3
import os
import glob
import tarfile
import shutil
import zipfile
def check(filename):
if 'my_awesome_pattern' in open(file).read():
print('matches')
def process_zip(f):
z = zipfile.ZipFile(f, 'r') # <- here problem occurs
z.extractall('tmp')
z.close()
def process_jar(file):
print('JAR')
process_zip(file)
def process_apk(file):
print('APK')
process_zip(file)
def process_xml(file):
print('XML')
check(file)
def process_tar(filename):
print(filename)
tar = tarfile.open(filename)
for entry in tar.getnames():
print(">>> " + entry)
if entry.endswith('xml'):
tar.extract(entry)
process_xml(entry)
os.remove(entry)
elif entry.endswith('jar'):
tar.extract(entry)
process_jar(entry)
os.remove(entry)
elif entry.endswith('apk'):
tar.extract(entry)
process_apk(entry)
os.remove(entry)
tar.close()
for file in glob.glob("*.tar.xz"):
process_tar(file)
But runtime stops with:
setupwizardtablet-all.tar.xz
>>> setupwizardtablet-all
>>> setupwizardtablet-all/nodpi
>>> setupwizardtablet-all/nodpi/priv-app
>>> setupwizardtablet-all/nodpi/priv-app/SetupWizard
>>> setupwizardtablet-all/nodpi/priv-app/SetupWizard/SetupWizard.apk
APK
Traceback (most recent call last):
File "./scan.py", line 56, in <module>
process_tar(file)
File "./scan.py", line 49, in process_tar
process_apk(entry)
File "./scan.py", line 27, in process_apk
process_zip(file)
File "./scan.py", line 16, in process_zip
z = zipfile.ZipFile(f, 'r') # <- here problem occurs
File "/usr/lib/python3.4/zipfile.py", line 937, in __init__
self._RealGetContents()
File "/usr/lib/python3.4/zipfile.py", line 1034, in _RealGetContents
x._decodeExtra()
File "/usr/lib/python3.4/zipfile.py", line 415, in _decodeExtra
tp, ln = unpack('<HH', extra[:4])
struct.error: unpack requires a bytes object of length 4
And I've stuck with this error. Python is not my cup of tea, so I'm looking for help.
Thanks in advance!
Related
I am trying to make a map in python using shapefiles I have downloaded from bbike.org. Here is my code:
import geopandas as gpd
import os
import sys
import matplotlib.pyplot as plt
bos_files_list = ['buildings.shx', 'landuse.shx', 'natural.shx', 'places.shx', 'points.shx', 'railways.shx', 'roads.shx']
cur_path = os.path.dirname(__file__)
def maps_of_bos(files):
for x in range(len(files)):
os.chdir(f'location/of/file')
f = open(f'{files[x]}', 'r')
gpd.read_file(f)
z = maps_of_bos(bos_files_list)
z.plot()
plt.show()
However, my error output is as follows:
Traceback (most recent call last):
File "test.py", line 16, in <module>
z = maps_of_bos(bos_files_list)
File "test.py", line 13, in maps_of_bos
gpd.read_file(f)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/geopandas/io/f
ile.py", line 76, in read_file
with reader(path_or_bytes, **kwargs) as features:
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/contextlib.py", line 113, in
__enter__
return next(self.gen)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/fiona/__init__
.py", line 206, in fp_reader
dataset = memfile.open()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/fiona/io.py",
line 63, in open
return Collection(vsi_path, 'w', crs=crs, driver=driver,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/fiona/collecti
on.py", line 126, in __init__
raise DriverError("no driver")
fiona.errors.DriverError: no driver
I am relatively new to python, and don't really understand my error. can someone please help me?
According to the docs read_file should take the path to the file not an object.
gpd.read_file(f'{files[x]}')
you dont need
f = open(f'{files[x]}', 'r')
I want make a converter based on python 3.8
I'm using imageoi API 2.6.1
Here's some of my codes what i think i did it wrong
from tkinter import *
from tkinter import filedialog
import imageio
import os
root = Tk()
ftypes = [('All Files', "*.*"), ('Webm', "*.webm")]
ttl = "Select Files(s)"
dir1 = 'D:/My Pictures/9gag'
root.fileName = filedialog.askopenfilenames(filetypes=ftypes, initialdir=dir1, title=ttl)
lst = list(root.fileName)
def path_leaf(path):
return path.strip('/').strip('\\').split('/')[-1].split('\\')[-1]
print([path_leaf(path) for path in lst])
lst2 = [path_leaf(path) for path in lst]
print(lst)
def gifMaker(inputPath, targetFormat):
outputPath = os.path.splitext(inputPath)[0] + targetFormat
print(f'converting {inputPath} \n to {outputPath}')
reader = imageio.get_reader(inputPath)
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer(outputPath, fps=fps)
for frames in reader:
writer.append_data(frames)
print(f'Frame {frames}')
print('Done!')
writer.close()
for ad in lst2:
gifMaker(ad, '.gif')
And the error are shown like this
Traceback (most recent call last):
File "D:/My Pictures/GIF/GIF.py", line 41, in <module>
gifMaker(ad, '.gif')
File "D:/My Pictures/GIF/GIF.py", line 28, in gifMaker
reader = imageio.get_reader(inputPath)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\functions.py", line 173, in get_reader
request = Request(uri, "r" + mode, **kwargs)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\request.py", line 126, in __init__
self._parse_uri(uri)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\request.py", line 278, in _parse_uri
raise FileNotFoundError("No such file: '%s'" % fn)
FileNotFoundError: No such file: 'D:\My Pictures\GIF\a6VOVL2_460sv.mp4'
So, what am i missing or fault? I don't understand why the error is showing "file is not found". Can someone explain to me in detail, how these lines of error occurred?
There are several possibilities
Maybe you misstyped the path/filename.
Maybe the space in the path is causing trouble.
Below is my code -
import tarfile
import os
import sys
import re
script, bak = sys.argv
bakfile = str(bak)
currentwd = os.path.dirname(os.path.realpath(__file__))
file_to_work = tarfile.open(name=currentwd+"/"+bakfile, mode="r")
file_to_work.extractall()
currentwd = os.path.dirname(os.path.realpath(__file__))
with open(currentwd+"/onedb.xml", "r") as file:
f = file.read()
words = re.findall(r'{ssha}_\w*?=', f)
re.sub(words,r'string_to_replace',f)
I used tarfile module and extracted a gzfile, from the extracted files, picked onedb.xml. Used regex to find the strings and that was successful.
Now when I try to replace searched strings using re.sub, I get the below error.
Traceback (most recent call last):
File "preset.py", line 16, in <module>
re.sub(words,r'string_to_replace',f)
File "/usr/lib/python2.7/re.py", line 151, in sub
return _compile(pattern, flags).sub(repl, string, count)
File "/usr/lib/python2.7/re.py", line 232, in _compile
p = _cache.get(cachekey)
TypeError: unhashable type: 'list'
Use all in one Expression:
re.sub(r'{ssha}_\w*?=', r'string_to_replace', f)
I'm using python 2.7
Here is my code to parse files in a folder
import linecache
import glob
path = r"G:\test\folder1"
Key = '''testresult="NOK"'''
Files = glob.glob(path+'\*.xml')
for FileName in Files:
Loop_Count = 1
while Loop_Count!= 50:
Line_Read = linecache.getline(FileName, Loop_Count)
if (Key in Line_Read):
a = FileName.split('\\')
b = len(a)-1
print a[b]
break
elif(Loop_Count == 49):
pass
Loop_Count = Loop_Count+1
print "Completed"
if folder1 has many files, i'm getting memory error
Traceback (most recent call last):
File "C:\Users\whoKnows\Desktop\test_Check111.py", line 10, in <module> Line_Read = linecache.getline(FileName, Loop_Count)
File "C:\Python27\lib\linecache.py", line 14, in getline
lines = getlines(filename, module_globals)
File "C:\Python27\lib\linecache.py", line 40, in getlines
return updatecache(filename, module_globals)
File "C:\Python27\lib\linecache.py", line 128, in updatecache
lines = fp.readlines()
MemoryError
I think its because i'm opening all the files for reading and i'm not closing them. Can anyone please tell me how to close the files While using glob.
MemoryError means you have run out of memory. You are probably loading all the files into the memory at once. Try deleting lines not needed anymore with linecache.clearcache().
I am trying to create a Python script using the PyPDF Module. What the script does it take the 'Root' folder, merges all the PDFs in it and outputs the merged PDF in an 'Output' folder and renames it to 'Root.pdf' (the folder which containes the split PDFs). What it does then is do the same with the sub-directories, giving the final output a name equal to the sub-directories.
I'm stuck when coming to process the sub-directories, giving me an error code related to some hex values. (it seems that it is getting a null value which is not in hex)
Here is the error code generated:
Traceback (most recent call last):
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 76, in <module>
files_recursively(path)
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 74, in files_recursively
os.path.walk(path, process_file, ())
File "C:\Python27\lib\ntpath.py", line 263, in walk
walk(name, func, arg)
File "C:\Python27\lib\ntpath.py", line 259, in walk
func(arg, top, names)
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 38, in process_file
pdf = PdfFileReader(file( filename, "rb"))
File "C:\Python27\lib\site-packages\pyPdf\pdf.py", line 374, in __init__
self.read(stream)
File "C:\Python27\lib\site-packages\pyPdf\pdf.py", line 775, in read
newTrailer = readObject(stream, self)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 58, in readObject
return ArrayObject.readFromStream(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 153, in readFromStream
arr.append(readObject(stream, pdf))
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 69, in readObject
return readHexStringFromStream(stream)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 276, in readHexStringFromStream
txt += chr(int(x, base=16))
ValueError: invalid literal for int() with base 16: '\x00\x00'
This is the source code for the script:
#----------------------------------------------------------------------------------------------
# Name: pdfMerger
# Purpose: Automatic merging of all PDF files in a directory and its sub-directories and
# rename them according to the folder itself. Requires the pyPDF Module
#
# Current: Processes all the PDF files in the current directory
# To-Do: Process the sub-directories.
#
# Version: 1.0
# Author: Brian Livori
#
# Created: 03/08/2011
# Copyright: (c) Brian Livori 2011
# Licence: Open-Source
#---------------------------------------------------------------------------------------------
#!/usr/bin/env <strong class="highlight">python</strong>
import os
import glob
import sys
import fnmatch
from pyPdf import PdfFileReader, PdfFileWriter
output = PdfFileWriter()
path = str(os.getcwd())
x = 0
def process_file(_, path, filelist):
for filename in filelist:
if filename.endswith('.pdf'):
filename = os.path.join(path, filename)
print "Merging " + filename
pdf = PdfFileReader(file( filename, "rb"))
x = pdf.getNumPages()
i = 0
while (i != x):
output.addPage(pdf.getPage(i))
print "Merging page: " + str(i+1) + "/" + str(x)
i += 1
output_dir = "\Output\\"
ext = ".pdf"
dir = os.path.basename(path)
outputpath = str(os.getcwd()) + output_dir
final_output = outputpath
if os.path.exists(final_output) != True:
os.mkdir(final_output)
outputStream = file(final_output + dir + ext, "wb")
os.path.join(outputStream)
output.write(outputStream)
outputStream.close()
else:
outputStream = file(final_output + dir + ext, "wb")
os.path.join(outputStream)
output.write(outputStream)
outputStream.close()
def files_recursively(topdir):
os.path.walk(path, process_file, ())
files_recursively(path)
It looks like the PDF files you are reading are not valid PDF files, or they are more exotic than PyPDF is prepared for. Are you sure you have good PDF files to read?
Also, there are a few odd things in your code, but this one might really matter:
output_dir = "\Output\\"
You have a \O escape sequence there which isn't what you want.