Python : Add watermark/background in all pages PDF - python

I just want to add/merge background in all pages in PDf but getpage(i) in inputfile giving me error. Only getPage(0) is running without error but creating duplicate copy of page first throughout the document keeping the original number of pages.
here is my code
from typing import BinaryIO
import os
import PyPDF2
from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger
from tkinter.filedialog import askopenfilename
from fpdf import FPDF
input_file = askopenfilename()
pdf = PdfFileReader(input_file)
watermark = PyPDF2.PdfFileReader(open('F:\abc\abc\PDF
Templates\Report First - Potrait.pdf', 'rb'))
output = PdfFileWriter()
num_numbers = pdf.numPages
for i in range(pdf.getNumPages()):
watermarks = watermark.getPage(0)
page = pdf.getPage(i)
page.mergePage(watermarks)
output.addPage(page)
with open(input_file.rsplit(".", 1)[0] + '_FP.pdf', "wb") as merged_file:
output.write(merged_file)
getting Error::
Traceback (most recent call last): File "C:\Users\Gaurav\Desktop\PFD
python\abc\PFD python\test2.py", line 23, in
page.mergePage(watermarks) File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\pdf.py",
line 2417, in mergePage
self._mergePage(page2) File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\pdf.py",
line 2426, in _mergePage
originalResources = self[PG.RESOURCES].getObject() File "C:\Users\Gaurav\AppData\Local\Programs\Python\Python310\lib\site-packages\PyPDF2\generic.py",
line 539, in getitem
return dict.getitem(self, key).getObject() KeyError: '/Resources'

Related

FileNotFound error / reading PDF Files with PyPDF2 and os.listdir()

I have the following script to merge a couple of PDFs together:
import PyPDF2
import sys
import os
inputs = sys.argv[1]
list = os.listdir(inputs)
merger = PyPDF2.PdfFileMerger()
for pdf in list:
merger.append(pdf)
merger.write('merged.pdf')
print('All done')
The folder with the files is in a different directory than the running script, thus I inserted the full path.
Upon running like so from the terminal, python3 pdf-merger.py /Users/user/Documents/pdf_list, I get the following error:
Traceback (most recent call last):
File "pdf-merger.py", line 11, in <module>
merger.append(pdf)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/PyPDF2/merger.py", line 203, in append
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/PyPDF2/merger.py", line 114, in merge
fileobj = file(fileobj, 'rb')
FileNotFoundError: [Errno 2] No such file or directory: 'card.pdf'
I also tried with an alternative method:
import PyPDF2
import sys
import os
inputs = sys.argv[1]
list = [os.path.join(inputs,a) for a in os.listdir(inputs)]
merger = PyPDF2.PdfFileMerger()
for pdf in list:
merger.append(pdf)
merger.write('merged.pdf')
print('All done')
This time I get a PyPDF2.utils.PdfReadError: Could not read malformed PDF file, no matter what file it is.
Any ideas?
Found the problem. There was a hidden .DS_Store file in the directory which corrupted the script.
Ignoring it with if pdf.endswith('.pdf') resolved the issue!

cannot read simple txt file in python

I am a beginner. I have a simple txt file which I need to read (using numpy). I have the program in the same directory as the .txt file.
I have checked the cwd and it's the right one. Also, I've written a text file in order to see if python wants to open that one - that file opens just fine.
import os
import numpy as np
np.loadtxt("test2.txt")
The code above gives me the error.
The code below works just fine.
import os
import numpy as np
x = np.array([1, 2, 3])
np.savetxt("test.txt", x)
y = np.loadtxt("test.txt")
print(y)
The error I get is:
Traceback (most recent call last):
File "D:\detest\admi.py", line 5, in <module>
np.loadtxt("test2.txt")
File "C:\Users\Mircea\AppData\Roaming\Python\Python37\site-packages\numpy\lib\npyio.py", line 962, in loadtxt
fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
File "C:\Users\Mircea\AppData\Roaming\Python\Python37\site-packages\numpy\lib\_datasource.py", line 266, in open
return ds.open(path, mode, encoding=encoding, newline=newline)
File "C:\Users\Mircea\AppData\Roaming\Python\Python37\site-packages\numpy\lib\_datasource.py", line 624, in open
raise IOError("%s not found." % path)
OSError: test2.txt not found.
Can you use the Python read file instead?
path = '' # location of your file
openfile = open(path, 'r') # open file
openfile.read() # return all content of file
openfile.close() # close file

Python: Numbering Pages in a PDF using PyPDF2 and io

So I am trying to retrospectively add a page numbering to a PDF file. I don't understand how this works.
I copied the code together from here and here.
I keep a problem I can't seem to fix on my own, probably because I don't understand what is happening even after reading the PyPDF2 documentation.
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=A4)
can.drawString(10, 100, "Page" + str(15)) #just a random test number
can.save()
packet.seek(0)
watermark = PdfFileReader(packet)
watermark_page = watermark.getPage(0)
pdf = PdfFileReader('in.pdf')
pdf_writer = PdfFileWriter()
for page in range(pdf.getNumPages()):
pdf_page = pdf.getPage(page)
pdf_page.mergePage(watermark_page)
pdf_writer.addPage(pdf_page)
with open('out.pdf', 'wb') as fh:
pdf_writer.write(fh)
This works fine. However, I would like to give every page a different number. So I changed the for loop to this:
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
packet = io.BytesIO()
pdf = PdfFileReader('in.pdf')
pdf_writer = PdfFileWriter()
for page in range(pdf.getNumPages()):
can = canvas.Canvas(packet, pagesize=A4)
can.drawString(10, 200, "Page " + str(page) )
can.save()
packet.seek(0)
watermark = PdfFileReader(packet)
watermark_page = watermark.getPage(0)
pdf_page = pdf.getPage(page)
pdf_page.mergePage(watermark_page)
pdf_writer.addPage(pdf_page)
with open('out.pdf', 'wb') as fh:
pdf_writer.write(fh)
This does not work.
I get:
Traceback (most recent call last):
File "<ipython-input-44-c6a76740be9f>", line 1, in <module>
runfile('//DIR/pdftest.py', wdir='//DIR')
File "C:\Program Files (x86)\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Program Files (x86)\Anaconda\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "//DIR/pdftest.py", line 55, in <module>
watermark = PdfFileReader(packet)
File "C:\Program Files (x86)\Anaconda\lib\site-packages\PyPDF2\pdf.py", line 1084, in __init__
self.read(stream)
File "C:\Program Files (x86)\Anaconda\lib\site-packages\PyPDF2\pdf.py", line 1901, in read
raise utils.PdfReadError("Could not find xref table at specified location")
PdfReadError: Could not find xref table at specified location
A bit of help understanding as well as fixing this would be greatly appreciated.
Thank you!

python docx can not find docx file

I actually have the file in my folder:
my code is :
#-*-coding:utf-8-*-
import re
import time
import datetime
import sys
import os
import csv
import docx
from docx import Document
from docx import *
CURRENT_DIR = os.path.dirname(os.path.abspath(sys.argv[0]))
docxFilePath = os.path.join(CURRENT_DIR,'111.docx')
doc=Document(docxFilePath)
when I run it , it returns me error is:
Traceback (most recent call last): File
"C:\Users\Windows\Desktop\test\fp\makereport.py", line 20, in <module>
doc=Document(docxFilePath) File "C:\Python27\lib\site-packages\docx\api.py", line 25, in Document
document_part = Package.open(docx).main_document_part File "C:\Python27\lib\site-packages\docx\opc\package.py", line 116, in open
pkg_reader = PackageReader.from_file(pkg_file) File "C:\Python27\lib\site-packages\docx\opc\pkgreader.py", line 32, in
from_file
phys_reader = PhysPkgReader(pkg_file) File "C:\Python27\lib\site-packages\docx\opc\phys_pkg.py", line 31, in
__new__
"Package not found at '%s'" % pkg_file docx.opc.exceptions.PackageNotFoundError: Package not found at
'C:\Users\Windows\Desktop\test\fp\111.docx'
please help
It seems for other file formats as docx, xlsx, and pdfs the file should be in the current working directory. So u can do :
import os
os.chdir('C://Users/Windows/Desktop/test/fp')
Then see if it works.
I have encountered the same problem and Scanny has answered it correct that file was found but was not a real .docx file.
Don't create it in any other application and rename it to .docx but create a real .docx file.
You can use below to create one using code.
doc = docx.Document()
doc.save("/path/to/file/where/it/needs/to/save/.docx")

How to generate PDF file from an HTML file using Reportlab and Pisa in Python?

Have the following code setup as follows to generate a PDF document using Reportlab and Pisa in Python.
import cStringIO
import ho.pisa as pisa
def html_to_pdf(data, filename, open=False):
pdf = pisa.CreatePDF(
cStringIO.StringIO(data),
file(filename, "wb"))
My HTML file contains standard HTML content.
It's fully qualified path along with .html extension is assigned to the output_file variable.
Call it like this:
with open(output_file, "r") as my_file:
contents = my_file.read()
html_to_pdf(contents, dest_pdf_file, open=True)
Get this error:
No handlers could be found for logger "sx.pisa3"
Traceback (most recent call last):
File "/home/devuser/myapp/app.py", line 8, in <module>
from utils.fileutils import FileUtil
File "/home/devuser/myapp/utils/fileutils.py", line 5, in <module>
import ho.pisa as pisa
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/ho/pisa/__init__.py", line 26, in <module>
from sx.pisa3.pisa import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/__init__.py", line 41, in <module>
from pisa import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa.py", line 32, in <module>
from pisa_document import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_document.py", line 22, in <module>
from pisa_context import pisaContext
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_context.py", line 21, in <module>
from pisa_util import *
File "/usr/local/lib/python2.7/dist-packages/pisa-3.0.33-py2.7.egg/sx/pisa3/pisa_util.py", line 55, in <module>
raise ImportError("Reportlab Version 2.1+ is needed!")
ImportError: Reportlab Version 2.1+ is needed!
This is a "partial list" of what pip freeze yields.
Pillow==2.3.0
PyPDF2==1.25.1
html5lib==0.999
oneconf==0.3.7.14.04.1
pdfkit==0.5.0
pisa==3.0.33
reportlab==3.0
Seems like a broken installation issue...
Does anyone know how to fix this or any alternative methods (approaches and / or different libraries) used to generate HTML files into PDFs?
Got it working... Uninstalled and reinstalled pisa and it worked! :)
sudo easy_install pisa
My code:
import cStringIO
import ho.pisa as pisa
class FileUtil:
#staticmethod
def html_to_pdf(html, output_file):
pdfFile = file(output_file, "wb")
pdf = pisa.CreatePDF(
cStringIO.StringIO(html.encode("ISO-8859-1")), pdfFile)
pdfFile.close()

Categories