from PyPDF2 import PdfWriter, PdfReader
reader = PdfReader('my-aadhar-card.pdf')
writer = PdfWriter()
page = reader.pages[0]
page.cropbox.upper_right = (290,264)
page.cropbox.lower_left = (32,102)
writer.add_page(page)
with open('result1.pdf','wb') as fp:
writer.write(fp)
writer2 = PdfWriter()
page2 = reader.pages[0]
page2.cropbox.upper_left = (560,264)
page2.cropbox.lower_right = (302,102)
writer2.add_page(page)
with open('result2.pdf','wb') as fp2:
writer2.write(fp2)
Help: I am cropping 2 pieces of pdf from Aadhaar card PDF download from uidai, I need to combine result1.pdf and result2.pdf side by side like the below image, When I merge them its creating 2 pages and one below the other
thanks in Advance
I dont have enough reputation score right now to answer a question I found - how to use python to split pdf pages into half and recombine it for further processing ..
#!/usr/bin/env python
'''
Chops each page in half, e.g. if a source were
created in booklet form, you could extract individual
pages, and re-combines it
'''
from PyPDF2 import PdfFileWriter,PdfFileReader,PdfFileMerger
#split left
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (60, 50)
page.cropBox.upperRight = (305, 700)
output.addPage(page)
with open("left.pdf", "wb") as out_f:
output.write(out_f)
#split right
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (300, 50)
page.cropBox.upperRight = (540, 700)
output.addPage(page)
with open("right.pdf", "wb") as out_f:
output.write(out_f)
#combine splitted files
input1 = PdfFileReader(open("left.pdf","rb"))
input2 = PdfFileReader(open("right.pdf","rb"))
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
l = input1.getPage(i)
output.addPage(l)
r = input2.getPage(i)
output.addPage(r)
with open("out.pdf", "wb") as out_f:
output.write(out_f)
Note : The cropping parameters are specific for your PDF , so, please, check it before execution of the program.
Further : Now, You can use this document to extract text easily without getting the columns merged into each other -- messed up extraction ..
I have the following code that crops part of pdf file then save the output as PDF
from PyPDF2 import PdfFileWriter, PdfFileReader
with open("Sample.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
print("Document Has %s Pages." % numPages)
for i in range(1):
page = input1.getPage(i)
print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
page.trimBox.lowerLeft = (280, 280)
page.trimBox.upperRight = (220, 200)
page.cropBox.lowerLeft = (100, 720)
page.cropBox.upperRight = (220, 800)
output.addPage(page)
with open("Output.pdf", "wb") as out_f:
output.write(out_f)
How can I save as an image not as PDF?
I found this code but the output is not at high quality. How can I improve the quality of the image output?
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
page = doc.loadPage(0)
pix = page.getPixmap()
output = "Output.jpg"
pix.writePNG(output)
Hi There You Could Use The pdf2image library for achieving so.
You Could Use The Following Code At The End:
from pdf2image import convert_from_path
images = convert_from_path('Output.pdf')
for i in range(len(images)):
images[i].save('Output'+ str(i) +'.jpg', 'JPEG')
Then If You Wish You Could Use The os library to delete the pdf you made using the following code in order to avoid the hassle of deleting the pdf yourself.
import os
os.remove("Output.pdf")
This solves the problem but I welcome any advanced ideas and improvements
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
zoom = 2 # zoom factor
mat = fitz.Matrix(zoom, zoom)
page = doc.loadPage(0)
pix = page.getPixmap(matrix = mat)
output = "Output.jpg"
pix.writePNG(output)
How to change the font in pyPDF 2 module.
I tried print(help(canvas.Canvas))
I tried the initialFontName = None and initialFontSize = None
but my text didn't change. Also, I'm doing this using raspberry-pi with raspbian operating system.
Here's my code
can = canvas.Canvas(packet, pagesize=letter, initialFontName = 'Helvetica', initialFontSize = 5)
now = datetime.datetime.now()
can.drawString(250, 500, self.get_Firstn1 + ' ' + self.get_Middlen1 +' ' + self.get_Lastn1)
can.drawString(300, 550, "Hello World")
can.drawString(350, 600,"Hello")
can.save()
packet.seek(0)
new_pdf = PdfFileReader(packet)
existing_pdf = PdfFileReader(open("Sample.pdf", "rb"))
existing_pdf.decrypt('')
output = PdfFileWriter()
# add the "watermark" (which is the new pdf) on the existing page
page = existing_pdf.getPage(0)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
#write "output" to a real file
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
#open pdf file
opener ="open" if sys.platform == "darwin"else "xdg-open"
subprocess.call([opener, "destination.pdf"])
I tried to change fonts like Arial, Times New Roman, Cambria etc. but it only accepts Helvetica and the text size and font didn't change.
How can I place an image over an existing PDF file at an specific coordinate location. The pdf represents a drawing sheet with one page. The image will be scaled. I'm checking ReportLab but can't find the answer. Thanks.
Its been 5 years, I think these answers need some TLC. Here is a complete solution.
The following is tested with Python 2.7
Install dependencies
pip install reportlab
pip install pypdf2
Do the magic
from reportlab.pdfgen import canvas
from PyPDF2 import PdfFileWriter, PdfFileReader
# Create the watermark from an image
c = canvas.Canvas('watermark.pdf')
# Draw the image at x, y. I positioned the x,y to be where i like here
c.drawImage('test.png', 15, 720)
# Add some custom text for good measure
c.drawString(15, 720,"Hello World")
c.save()
# Get the watermark file you just created
watermark = PdfFileReader(open("watermark.pdf", "rb"))
# Get our files ready
output_file = PdfFileWriter()
input_file = PdfFileReader(open("test2.pdf", "rb"))
# Number of pages in input document
page_count = input_file.getNumPages()
# Go through all the input file pages to add a watermark to them
for page_number in range(page_count):
print "Watermarking page {} of {}".format(page_number, page_count)
# merge the watermark with the page
input_page = input_file.getPage(page_number)
input_page.mergePage(watermark.getPage(0))
# add page from input file to output document
output_file.addPage(input_page)
# finally, write "output" to document-output.pdf
with open("document-output.pdf", "wb") as outputStream:
output_file.write(outputStream)
References:
pypdf project page:
https://pypi.org/project/pypdf/
Reportlab docs:
http://www.reportlab.com/apis/reportlab/2.4/pdfgen.html
Reportlab complete user guide:
https://www.reportlab.com/docs/reportlab-userguide.pdf
https://pypi.org/project/pypdf/:
from pypdf import PdfWriter, PdfReader
writer = PdfWriter()
reader = PdfReader("document1.pdf")
watermark = PdfReader("watermark.pdf")
page = reader.pages[0]
page.merge_page(watermark.pages[0])
writer.add_page(page)
# finally, write the results to disk
with open("document-output.pdf", "wb") as fp:
writer.write(fp)
I think it's like watermark, see the documentation for more information
I combined ReportLab and pypdf to insert an image directly without having to generate the PDF up front:
from pyPdf import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from StringIO import StringIO
# Using ReportLab to insert image into PDF
imgTemp = StringIO()
imgDoc = canvas.Canvas(imgTemp)
# Draw image on Canvas and save PDF in buffer
imgPath = "path/to/img.png"
imgDoc.drawImage(imgPath, 399, 760, 160, 160) ## at (399,760) with size 160x160
imgDoc.save()
# Use PyPDF to merge the image-PDF into the template
page = PdfFileReader(file("document.pdf","rb")).getPage(0)
overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
page.mergePage(overlay)
#Save the result
output = PdfFileWriter()
output.addPage(page)
output.write(file("output.pdf","w"))
Thx to the previous answers. My way with python3.4
# -*- coding: utf-8 -*-
from io import BytesIO
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
def gen_pdf():
# there are 66 slides (1.jpg, 2.jpg, 3.jpg...)
path = 'slades/{0}.jpg'
pdf = PdfFileWriter()
for num in range(1, 67): # for each slide
# Using ReportLab Canvas to insert image into PDF
imgTemp = BytesIO()
imgDoc = canvas.Canvas(imgTemp, pagesize=A4)
# Draw image on Canvas and save PDF in buffer
imgDoc.drawImage(path.format(num), -25, -45)
# x, y - start position
# in my case -25, -45 needed
imgDoc.save()
# Use PyPDF to merge the image-PDF into the template
pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0))
pdf.write(open("output.pdf","wb"))
if __name__ == '__main__':
gen_pdf()
This is quite easy to do with PyMuPDF without merging two PDFs:
import fitz
src_pdf_filename = 'source.pdf'
dst_pdf_filename = 'destination.pdf'
img_filename = 'barcode.jpg'
# http://pymupdf.readthedocs.io/en/latest/rect/
# Set position and size according to your needs
img_rect = fitz.Rect(100, 100, 120, 120)
document = fitz.open(src_pdf_filename)
# We'll put image on first page only but you could put it elsewhere
page = document[0]
page.insertImage(img_rect, filename=img_filename)
# See http://pymupdf.readthedocs.io/en/latest/document/#Document.save and
# http://pymupdf.readthedocs.io/en/latest/document/#Document.saveIncr for
# additional parameters, especially if you want to overwrite existing PDF
# instead of writing new PDF
document.save(dst_pdf_filename)
document.close()
This is what worked for me
from PyPDF2 import PdfFileWriter, PdfFileReader
def watermarks(temp, watermar,new_file):
template = PdfFileReader(open(temp, 'rb'))
wpdf = PdfFileReader(open(watermar, 'rb'))
watermark = wpdf.getPage(0)
for i in xrange(template.getNumPages()):
page = template.getPage(i)
page.mergePage(watermark)
output.addPage(page)
with open(new_file, 'wb') as f:
output.write(f)