I am trying to create a Python script using the PyPDF Module. What the script does it take the 'Root' folder, merges all the PDFs in it and outputs the merged PDF in an 'Output' folder and renames it to 'Root.pdf' (the folder which containes the split PDFs). What it does then is do the same with the sub-directories, giving the final output a name equal to the sub-directories.
I'm stuck when coming to process the sub-directories, giving me an error code related to some hex values. (it seems that it is getting a null value which is not in hex)
Here is the error code generated:
Traceback (most recent call last):
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 76, in <module>
files_recursively(path)
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 74, in files_recursively
os.path.walk(path, process_file, ())
File "C:\Python27\lib\ntpath.py", line 263, in walk
walk(name, func, arg)
File "C:\Python27\lib\ntpath.py", line 259, in walk
func(arg, top, names)
File "C:\Documents and Settings\student3\Desktop\Test\pdfMergerV1.py", line 38, in process_file
pdf = PdfFileReader(file( filename, "rb"))
File "C:\Python27\lib\site-packages\pyPdf\pdf.py", line 374, in __init__
self.read(stream)
File "C:\Python27\lib\site-packages\pyPdf\pdf.py", line 775, in read
newTrailer = readObject(stream, self)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 58, in readObject
return ArrayObject.readFromStream(stream, pdf)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 153, in readFromStream
arr.append(readObject(stream, pdf))
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 69, in readObject
return readHexStringFromStream(stream)
File "C:\Python27\lib\site-packages\pyPdf\generic.py", line 276, in readHexStringFromStream
txt += chr(int(x, base=16))
ValueError: invalid literal for int() with base 16: '\x00\x00'
This is the source code for the script:
#----------------------------------------------------------------------------------------------
# Name: pdfMerger
# Purpose: Automatic merging of all PDF files in a directory and its sub-directories and
# rename them according to the folder itself. Requires the pyPDF Module
#
# Current: Processes all the PDF files in the current directory
# To-Do: Process the sub-directories.
#
# Version: 1.0
# Author: Brian Livori
#
# Created: 03/08/2011
# Copyright: (c) Brian Livori 2011
# Licence: Open-Source
#---------------------------------------------------------------------------------------------
#!/usr/bin/env <strong class="highlight">python</strong>
import os
import glob
import sys
import fnmatch
from pyPdf import PdfFileReader, PdfFileWriter
output = PdfFileWriter()
path = str(os.getcwd())
x = 0
def process_file(_, path, filelist):
for filename in filelist:
if filename.endswith('.pdf'):
filename = os.path.join(path, filename)
print "Merging " + filename
pdf = PdfFileReader(file( filename, "rb"))
x = pdf.getNumPages()
i = 0
while (i != x):
output.addPage(pdf.getPage(i))
print "Merging page: " + str(i+1) + "/" + str(x)
i += 1
output_dir = "\Output\\"
ext = ".pdf"
dir = os.path.basename(path)
outputpath = str(os.getcwd()) + output_dir
final_output = outputpath
if os.path.exists(final_output) != True:
os.mkdir(final_output)
outputStream = file(final_output + dir + ext, "wb")
os.path.join(outputStream)
output.write(outputStream)
outputStream.close()
else:
outputStream = file(final_output + dir + ext, "wb")
os.path.join(outputStream)
output.write(outputStream)
outputStream.close()
def files_recursively(topdir):
os.path.walk(path, process_file, ())
files_recursively(path)
It looks like the PDF files you are reading are not valid PDF files, or they are more exotic than PyPDF is prepared for. Are you sure you have good PDF files to read?
Also, there are a few odd things in your code, but this one might really matter:
output_dir = "\Output\\"
You have a \O escape sequence there which isn't what you want.
Related
I want make a converter based on python 3.8
I'm using imageoi API 2.6.1
Here's some of my codes what i think i did it wrong
from tkinter import *
from tkinter import filedialog
import imageio
import os
root = Tk()
ftypes = [('All Files', "*.*"), ('Webm', "*.webm")]
ttl = "Select Files(s)"
dir1 = 'D:/My Pictures/9gag'
root.fileName = filedialog.askopenfilenames(filetypes=ftypes, initialdir=dir1, title=ttl)
lst = list(root.fileName)
def path_leaf(path):
return path.strip('/').strip('\\').split('/')[-1].split('\\')[-1]
print([path_leaf(path) for path in lst])
lst2 = [path_leaf(path) for path in lst]
print(lst)
def gifMaker(inputPath, targetFormat):
outputPath = os.path.splitext(inputPath)[0] + targetFormat
print(f'converting {inputPath} \n to {outputPath}')
reader = imageio.get_reader(inputPath)
fps = reader.get_meta_data()['fps']
writer = imageio.get_writer(outputPath, fps=fps)
for frames in reader:
writer.append_data(frames)
print(f'Frame {frames}')
print('Done!')
writer.close()
for ad in lst2:
gifMaker(ad, '.gif')
And the error are shown like this
Traceback (most recent call last):
File "D:/My Pictures/GIF/GIF.py", line 41, in <module>
gifMaker(ad, '.gif')
File "D:/My Pictures/GIF/GIF.py", line 28, in gifMaker
reader = imageio.get_reader(inputPath)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\functions.py", line 173, in get_reader
request = Request(uri, "r" + mode, **kwargs)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\request.py", line 126, in __init__
self._parse_uri(uri)
File "C:\Python\Anaconda3\lib\site-packages\imageio\core\request.py", line 278, in _parse_uri
raise FileNotFoundError("No such file: '%s'" % fn)
FileNotFoundError: No such file: 'D:\My Pictures\GIF\a6VOVL2_460sv.mp4'
So, what am i missing or fault? I don't understand why the error is showing "file is not found". Can someone explain to me in detail, how these lines of error occurred?
There are several possibilities
Maybe you misstyped the path/filename.
Maybe the space in the path is causing trouble.
I have a set of *.tar.xz archives. Each of them may contain APK or JAR files, that indeed are zip archives. I'm trying to search for some pattern inside content of that zip archives. I use next code to accomplish it:
#! /usr/bin/env python3
import os
import glob
import tarfile
import shutil
import zipfile
def check(filename):
if 'my_awesome_pattern' in open(file).read():
print('matches')
def process_zip(f):
z = zipfile.ZipFile(f, 'r') # <- here problem occurs
z.extractall('tmp')
z.close()
def process_jar(file):
print('JAR')
process_zip(file)
def process_apk(file):
print('APK')
process_zip(file)
def process_xml(file):
print('XML')
check(file)
def process_tar(filename):
print(filename)
tar = tarfile.open(filename)
for entry in tar.getnames():
print(">>> " + entry)
if entry.endswith('xml'):
tar.extract(entry)
process_xml(entry)
os.remove(entry)
elif entry.endswith('jar'):
tar.extract(entry)
process_jar(entry)
os.remove(entry)
elif entry.endswith('apk'):
tar.extract(entry)
process_apk(entry)
os.remove(entry)
tar.close()
for file in glob.glob("*.tar.xz"):
process_tar(file)
But runtime stops with:
setupwizardtablet-all.tar.xz
>>> setupwizardtablet-all
>>> setupwizardtablet-all/nodpi
>>> setupwizardtablet-all/nodpi/priv-app
>>> setupwizardtablet-all/nodpi/priv-app/SetupWizard
>>> setupwizardtablet-all/nodpi/priv-app/SetupWizard/SetupWizard.apk
APK
Traceback (most recent call last):
File "./scan.py", line 56, in <module>
process_tar(file)
File "./scan.py", line 49, in process_tar
process_apk(entry)
File "./scan.py", line 27, in process_apk
process_zip(file)
File "./scan.py", line 16, in process_zip
z = zipfile.ZipFile(f, 'r') # <- here problem occurs
File "/usr/lib/python3.4/zipfile.py", line 937, in __init__
self._RealGetContents()
File "/usr/lib/python3.4/zipfile.py", line 1034, in _RealGetContents
x._decodeExtra()
File "/usr/lib/python3.4/zipfile.py", line 415, in _decodeExtra
tp, ln = unpack('<HH', extra[:4])
struct.error: unpack requires a bytes object of length 4
And I've stuck with this error. Python is not my cup of tea, so I'm looking for help.
Thanks in advance!
def morse_audio( item ):
from pyglet import media
import pyglet
import time
import glob
import os
import wave
from contextlib import closing
files = []
audios = []
for file in glob.glob('C:\\Users\\MQ\'s Virual World\\Downloads\\Morse\\*.wav'):
ass = str(os.path.join('C:\\Users\MQ\'s Virual World\\Downloads\\Morse', file))
print (ass)
files.append(ass)
#audio = media.load(files[1])
#audio.play()
#print (len(files))
one = list(item)
str_list = [x.strip(' ') for x in one]
str_list = [x.strip('/') for x in str_list]
for s in str_list[0]:
if s != "-" and s != ".":
list(item)
for letter in item:
for i in range(0, 51):
if letter == " ":
time.sleep(1.5)
audios.append("noise3.wav")
break
if letter != letterlst[i] and letter != letterlst[i].lower():
continue
else:
print (files[i])
audio = media.load(files[i])
audio.play()
audios.append(files[i])
audios.append("noise2.wav")
time.sleep(1)
else:
lst = item.split()
print (' '.join(lst))
for code in lst:
for i in range(0, 51):
if code == "/":
time.sleep(1.5)
audios.append("noise3.wav")
break
if code != morse[i]:
continue
else:
print (files[i])
audio = media.load(files[i])
audio.play()
audios.append(files[i])
audios.append("noise2.wav")
time.sleep(1)
break
outfile = "sounds.wav"
data= []
for file in audios:
w = wave.open(file, 'rb')
lol = w.getparams()
print (lol)
data.append( [w.getparams(), w.readframes(w.getnframes())] )
w.close()
with closing(wave.open(outfile, 'wb')) as output:
# find sample rate from first file
with closing(wave.open(audios[0])) as w:
output.setparams(w.getparams())
# write each file to output
for audioo in audios:
with closing(wave.open(audioo)) as w:
output.writeframes(w.readframes(w.getnframes()))()))
So this code previously worked but I wanted to use different file types other then .wav files but because that worked so poorly I went back to .wav. These are different .wav files but the ones that worked before get the same error message. Which is:
Traceback (most recent call last):
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\morsecode.py", line 187, in <module>
morse_audio("0123456789ÁÄ#&':,$=!-().+?;/_")
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\morsecode.py", line 96, in morse_audio
audio.play()
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\__init__.py", line 473, in play
player.play()
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\__init__.py", line 1012, in play
self._set_playing(True)
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\__init__.py", line 993, in _set_playing
self._create_audio_player()
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\__init__.py", line 1083, in _create_audio_player
self._audio_player = audio_driver.create_audio_player(group, self)
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\drivers\directsound\__init__.py", line 502, in create_audio_player
return DirectSoundAudioPlayer(source_group, player)
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\media\drivers\directsound\__init__.py", line 184, in __init__
None)
File "C:\Users\MQ's Virual World\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pyglet\com.py", line 125, in <lambda>
self.method.get_field()(self.i, self.name)(obj, *args)
File "_ctypes/callproc.c", line 920, in GetResult
OSError: [WinError -2147024809] The parameter is incorrect
I've tried .wav files that used to work. It works when I use a .ogg file. Also works with mp3s. Seems only .wav files are giving it issues. Very suddenly and randomly.
Hi I am unable to upload a file to S3 using boto. It fails with the following error message. Can someone help me, i am new to python and boto.
from boto.s3 import connect_to_region
from boto.s3.connection import Location
from boto.s3.key import Key
import boto
import gzip
import os
AWS_KEY = ''
AWS_SECRET_KEY = ''
BUCKET_NAME = 'mybucketname'
conn = connect_to_region(Location.USWest2,aws_access_key_id = AWS_KEY,
aws_secret_access_key = AWS_SECRET_KEY,
is_secure=False,debug = 2
)
bucket = conn.lookup(BUCKET_NAME)
bucket2 = conn.lookup('unzipped-data')
rs = bucket.list()
rs2 = bucket2.list()
compressed_files = []
all_files = []
files_to_download = []
downloaded_files = []
path = "~/tmp/"
# Check if the file has already been decompressed
def filecheck():
for filename in bucket.list():
all_files.append(filename.name)
for n in rs2:
compressed_files.append(n.name)
for file_name in all_files:
if file_name.strip('.gz') in compressed_files:
pass;
elif '.gz' in file_name and 'indeed' in file_name:
files_to_download.append(file_name)
# Download necessary files
def download_files():
for name in rs:
if name.name in files_to_download:
file_name = name.name.split('/')
print('Downloading: '+ name.name).strip('\n')
file_name = name.name.split('/')
name.get_contents_to_filename(path+file_name[-1])
print(' - Completed')
# Decompressing the file
print('Decompressing: '+ name.name).strip('\n')
inF = gzip.open(path+file_name[-1], 'rb')
outF = open(path+file_name[-1].strip('.gz'), 'wb')
for line in inF:
outF.write(line)
inF.close()
outF.close()
print(' - Completed')
# Uploading file
print('Uploading: '+name.name).strip('\n')
full_key_name = name.name.strip('.gz')
k = Key(bucket2)
k.key = full_key_name
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
print('Completed')
# Clean Up
d_list = os.listdir(path)
for d in d_list:
os.remove(path+d)
# Function Calls
filecheck()
download_files()
Error message :
Traceback (most recent call last):
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 86, in <module>
download_files()
File "C:\Users\Siddartha.Reddy\workspace\boto-test\com\salesify\sid\decompress_s3.py", line 75, in download_files
k.set_contents_from_filename(path+file_name[-1].strip('.gz'))
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1362, in set_contents_from_filename
encrypt_key=encrypt_key)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 1293, in set_contents_from_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 750, in send_file
chunked_transfer=chunked_transfer, size=size)
File "C:\Python27\lib\site-packages\boto\s3\key.py", line 951, in _send_file_internal
query_args=query_args
File "C:\Python27\lib\site-packages\boto\s3\connection.py", line 664, in make_request
retry_handler=retry_handler
File "C:\Python27\lib\site-packages\boto\connection.py", line 1070, in make_request
retry_handler=retry_handler)
File "C:\Python27\lib\site-packages\boto\connection.py", line 1029, in _mexe
raise ex
socket.error: [Errno 10053] An established connection was aborted by the software in your host machine
I have no problem downloading the files, but the upload fails for some weird reason.
If the problem is the size of files (> 5GB), you should use multipart upload:
http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html
search for multipart_upload in the docs:
http://boto.readthedocs.org/en/latest/ref/s3.html#module-boto.s3.multipart
Also, see this question for a related issue:
How can I copy files bigger than 5 GB in Amazon S3?
The process is a little non-intuitive. You need to:
run initiate_multipart_upload(), storing the returned object
split the file into chunks (either on disk, or read from memory using CStringIO)
feed the parts sequentially into upload_part_from_file()
run complete_upload() on the stored object
I downloaded Brive which downloads your Google Docs using the Drive API. I'm running into issues with the filename not saving if it has slashes and crashes the application. How can I modify the model.py file to rewrite / as _?
model.py
brive.py
I think I just need to rewrite the "file_name" or "path" on line 74.
backend.py:
def save(self, user, document):
self._mkdir(user.login)
prefix = self._root_dir + user.login + os.sep
for file_name, content in document.contents.items():
path = prefix + file_name
Log.debug(u'Writing {}\'s {} to {}'.format(
user.login, document.title, path
))
f = open(path, 'w')
f.write(content)
f.close()
This is the error:
[ 2013-01-17 T 06:17:08 Z ] Saving coral.lopez's doc "Lunchbox Monster High 4/7/12" (id: 1GyiuKFZeargO8KfzKS5H9V3PVbgTJufw2PwLaILzRVw)
[ 2013-01-17 T 06:17:08 Z ] Unexpected shutdown, deleting /home/davidneudorfer/google_docs_backup/2013-01-17T061021Z/ folder
### Unexpected error when saving coral.lopez's documents (doc id: 1GyiuKFZeargO8KfzKS5H9V3PVbgTJufw2PwLaILzRVw) ###
Traceback (most recent call last):
File "brive.py", line 114, in <module>
main()
File "brive.py", line 92, in main
user.save_documents(backend)
File "/home/davidneudorfer/Brive/model.py", line 79, in save_documents
self._save_single_document(backend, document)
File "/home/davidneudorfer/Brive/model.py", line 105, in _save_single_document
backend.save(self, document)
File "/home/davidneudorfer/Brive/backend.py", line 78, in save
f = open(path, 'w')
IOError: [Errno 2] No such file or directory: u'/home/davidneudorfer/google_docs_backup/2013-01-17T061021Z/coral.lopez/Lunchbox Monster High 4/7/12_1GyiuKFZeargO8KfzKS5H9V3PVbgTJufw2PwLaILzRVw.odt'
You could use the_str.replace('/', '_') to turn the path with '/'s in it into one with '_' in it.