How can I place an image over an existing PDF file at an specific coordinate location. The pdf represents a drawing sheet with one page. The image will be scaled. I'm checking ReportLab but can't find the answer. Thanks.
Its been 5 years, I think these answers need some TLC. Here is a complete solution.
The following is tested with Python 2.7
Install dependencies
pip install reportlab
pip install pypdf2
Do the magic
from reportlab.pdfgen import canvas
from PyPDF2 import PdfFileWriter, PdfFileReader
# Create the watermark from an image
c = canvas.Canvas('watermark.pdf')
# Draw the image at x, y. I positioned the x,y to be where i like here
c.drawImage('test.png', 15, 720)
# Add some custom text for good measure
c.drawString(15, 720,"Hello World")
c.save()
# Get the watermark file you just created
watermark = PdfFileReader(open("watermark.pdf", "rb"))
# Get our files ready
output_file = PdfFileWriter()
input_file = PdfFileReader(open("test2.pdf", "rb"))
# Number of pages in input document
page_count = input_file.getNumPages()
# Go through all the input file pages to add a watermark to them
for page_number in range(page_count):
print "Watermarking page {} of {}".format(page_number, page_count)
# merge the watermark with the page
input_page = input_file.getPage(page_number)
input_page.mergePage(watermark.getPage(0))
# add page from input file to output document
output_file.addPage(input_page)
# finally, write "output" to document-output.pdf
with open("document-output.pdf", "wb") as outputStream:
output_file.write(outputStream)
References:
pypdf project page:
https://pypi.org/project/pypdf/
Reportlab docs:
http://www.reportlab.com/apis/reportlab/2.4/pdfgen.html
Reportlab complete user guide:
https://www.reportlab.com/docs/reportlab-userguide.pdf
https://pypi.org/project/pypdf/:
from pypdf import PdfWriter, PdfReader
writer = PdfWriter()
reader = PdfReader("document1.pdf")
watermark = PdfReader("watermark.pdf")
page = reader.pages[0]
page.merge_page(watermark.pages[0])
writer.add_page(page)
# finally, write the results to disk
with open("document-output.pdf", "wb") as fp:
writer.write(fp)
I think it's like watermark, see the documentation for more information
I combined ReportLab and pypdf to insert an image directly without having to generate the PDF up front:
from pyPdf import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from StringIO import StringIO
# Using ReportLab to insert image into PDF
imgTemp = StringIO()
imgDoc = canvas.Canvas(imgTemp)
# Draw image on Canvas and save PDF in buffer
imgPath = "path/to/img.png"
imgDoc.drawImage(imgPath, 399, 760, 160, 160) ## at (399,760) with size 160x160
imgDoc.save()
# Use PyPDF to merge the image-PDF into the template
page = PdfFileReader(file("document.pdf","rb")).getPage(0)
overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
page.mergePage(overlay)
#Save the result
output = PdfFileWriter()
output.addPage(page)
output.write(file("output.pdf","w"))
Thx to the previous answers. My way with python3.4
# -*- coding: utf-8 -*-
from io import BytesIO
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
def gen_pdf():
# there are 66 slides (1.jpg, 2.jpg, 3.jpg...)
path = 'slades/{0}.jpg'
pdf = PdfFileWriter()
for num in range(1, 67): # for each slide
# Using ReportLab Canvas to insert image into PDF
imgTemp = BytesIO()
imgDoc = canvas.Canvas(imgTemp, pagesize=A4)
# Draw image on Canvas and save PDF in buffer
imgDoc.drawImage(path.format(num), -25, -45)
# x, y - start position
# in my case -25, -45 needed
imgDoc.save()
# Use PyPDF to merge the image-PDF into the template
pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0))
pdf.write(open("output.pdf","wb"))
if __name__ == '__main__':
gen_pdf()
This is quite easy to do with PyMuPDF without merging two PDFs:
import fitz
src_pdf_filename = 'source.pdf'
dst_pdf_filename = 'destination.pdf'
img_filename = 'barcode.jpg'
# http://pymupdf.readthedocs.io/en/latest/rect/
# Set position and size according to your needs
img_rect = fitz.Rect(100, 100, 120, 120)
document = fitz.open(src_pdf_filename)
# We'll put image on first page only but you could put it elsewhere
page = document[0]
page.insertImage(img_rect, filename=img_filename)
# See http://pymupdf.readthedocs.io/en/latest/document/#Document.save and
# http://pymupdf.readthedocs.io/en/latest/document/#Document.saveIncr for
# additional parameters, especially if you want to overwrite existing PDF
# instead of writing new PDF
document.save(dst_pdf_filename)
document.close()
This is what worked for me
from PyPDF2 import PdfFileWriter, PdfFileReader
def watermarks(temp, watermar,new_file):
template = PdfFileReader(open(temp, 'rb'))
wpdf = PdfFileReader(open(watermar, 'rb'))
watermark = wpdf.getPage(0)
for i in xrange(template.getNumPages()):
page = template.getPage(i)
page.mergePage(watermark)
output.addPage(page)
with open(new_file, 'wb') as f:
output.write(f)
Related
I'm trying to add a slightly transparent watermark to a bunch of pdf's. The problem is that my watermark only shows up on non-solid white pages (i.e if there is a solid color or a solid white background, the watermark doesn't show). My watermark is a pdf file. This is my code:
from pathlib import Path
from typing import Union, Literal, List
import pypdf
from pypdf import PdfWriter, PdfReader, PageObject
def watermark(
content_pdf: Path,
stamp_pdf: Path,
pdf_result: Path,
page_indices: Union[Literal["ALL"], List[int]] = "ALL",
):
reader = PdfReader(content_pdf)
if page_indices == "ALL":
page_indices = list(range(0, len(reader.pages)))
writer = PdfWriter()
reader_stamp = PdfReader(stamp_pdf)
for index in page_indices:
page = PageObject.create_blank_page(reader_stamp)
page.merge_page(reader_stamp.pages[0])
content_page = reader.pages[index]
mediabox = content_page.mediabox
page.merge_page(content_page)
page.mediabox = mediabox
writer.add_page(page)
with open(pdf_result, "wb") as fp:
writer.write(fp)
watermark(content_pdf=path/to/pdf.pdf,
stamp_pdf=path/to/watermark.pdf,
pdf_result=path/to/outout/dir )
This is the pdf I get as an output. Essentially, the top pdf (where the watermark is in the background) is the result I want, but when there's a solid color in the original pdf the watermark doesn't show.
I have also tried adding the watermark as a stamp (see code below), but naturally the watermark is added on top of the pdf, blocking some of the text.
from pathlib import Path
from typing import Union, Literal, List
from pypdf import PdfWriter, PdfReader
def stamp(
content_pdf: Path,
stamp_pdf: Path,
pdf_result: Path,
page_indices: Union[Literal["ALL"], List[int]] = "ALL",
):
reader = PdfReader(stamp_pdf)
image_page = reader.pages[0]
writer = PdfWriter()
reader = PdfReader(content_pdf)
if page_indices == "ALL":
page_indices = list(range(0, len(reader.pages)))
for index in page_indices:
content_page = reader.pages[index]
mediabox = content_page.mediabox
content_page.merge_page(image_page)
content_page.mediabox = mediabox
writer.add_page(content_page)
with open(pdf_result, "wb") as fp:
writer.write(fp)
watermark(content_pdf=path/to/pdf.pdf,
stamp_pdf=path/to/watermark.pdf,
pdf_result=path/to/outout/dir )
So my question is, is there a way to modify one of these code snippets so that I get the output I want?
I am currently merging two pages into a single page in PyPDF3 but I need to draw a line in the middle of the two pages. Is this possible? Below is the sample code for reference. Thanks in advance!
from PyPDF3 import PdfFileWriter, PdfFileReader
from PyPDF3.pdf import PageObject
pdf_file = "Plan.pdf"
inputPDF = PdfFileReader(open(pdf_file, "rb"), strict=False)
outputPDF = PdfFileWriter()
for x in range(0, inputPDF.numPages, 2):
page1 = inputPDF.getPage(x).rotateClockwise(90)
page2 = inputPDF.getPage(x + 1).rotateClockwise(90)
total_width = max([page1.mediaBox.upperRight[0],page2.mediaBox.upperRight[0]])
total_height = page1.mediaBox.upperRight[1] + page2.mediaBox.upperRight[1]
new_page = PageObject.createBlankPage(None, total_width, total_height)
new_page.mergeTranslatedPage(page1, 0, page1.mediaBox.upperRight[1])
new_page.mergeTranslatedPage(page2, 0, 0)
outputPDF.addPage(new_page.rotateCounterClockwise(90))
outputFile = "Merged_Plan.pdf"
outputPDF.write(open(outputFile, "wb"))
You can use the Line annotation: https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html#line
It was recently added to pypdf. You might need to update your installed version.
I recommend to switch away from PyPDF2 / PyPDF3 / PyPDF4 towards pypdf.
I have the following code that crops part of pdf file then save the output as PDF
from PyPDF2 import PdfFileWriter, PdfFileReader
with open("Sample.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
print("Document Has %s Pages." % numPages)
for i in range(1):
page = input1.getPage(i)
print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
page.trimBox.lowerLeft = (280, 280)
page.trimBox.upperRight = (220, 200)
page.cropBox.lowerLeft = (100, 720)
page.cropBox.upperRight = (220, 800)
output.addPage(page)
with open("Output.pdf", "wb") as out_f:
output.write(out_f)
How can I save as an image not as PDF?
I found this code but the output is not at high quality. How can I improve the quality of the image output?
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
page = doc.loadPage(0)
pix = page.getPixmap()
output = "Output.jpg"
pix.writePNG(output)
Hi There You Could Use The pdf2image library for achieving so.
You Could Use The Following Code At The End:
from pdf2image import convert_from_path
images = convert_from_path('Output.pdf')
for i in range(len(images)):
images[i].save('Output'+ str(i) +'.jpg', 'JPEG')
Then If You Wish You Could Use The os library to delete the pdf you made using the following code in order to avoid the hassle of deleting the pdf yourself.
import os
os.remove("Output.pdf")
This solves the problem but I welcome any advanced ideas and improvements
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
zoom = 2 # zoom factor
mat = fitz.Matrix(zoom, zoom)
page = doc.loadPage(0)
pix = page.getPixmap(matrix = mat)
output = "Output.jpg"
pix.writePNG(output)
I'm using Python/Django.
PyPDF2 to read my current pdf.
I want to read a pdf that I have saved and get the orientation of a single page within the pdf.
I'm expecting to be able to determine if the page is either landscape or portrait.
tempoutpdffilelocation = settings.TEMPLATES_ROOT + nameOfFinalPdf
pageOrientation = pageToEdit.mediaBox
pdfOrientation = PdfFileReader(file(temppdffilelocation, "rb"))
# tempPdfOrientationPage = pdfOrientation.getPage(numberOfPageToEdit).mediaBox
print("existing pdf width: ")
# print(existing_pdf.getPage(numberOfPageToEdit).getWidth)
# print("get page size with rotation")
# print(tempPdfOrientationPage.getPageSizeWithRotation)
existing_pdf = pdfOrientation.getPage(numberOfPageToEdit).mediaBox
# print(pageOrientation)
if pageOrientation.getUpperRight_x() - pageOrientation.getUpperLeft_x() > pageOrientation.getUpperRight_y() - pageOrientation.getLowerRight_y():
print('Landscape')
print(pageOrientation)
# print(pdfOrientation.getWidth())
else:
print('Portrait')
print(pageOrientation)
# print(pdfOrientation.getWidth())
# create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
The last line setting the pagesize=letter what I want to determine based on my current pdf.
And here's my imports:
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, landscape
import urllib
I've tried pyPdf .mediaBox but that always returns the same value of the expected file size, not the actual size. And pyPdf is outdated.
As you can see I've also tried getWidth and withRotation.
I would think there's be an easy way for PyPDF2 PdfFileReader to determine the orientation of a selected object.
Any help is appreciated. Thanks.
I used simply "/Rotate" attribute of the page:
OrientationDegrees = pdf.getPage(numberOfPageToEdit).get('/Rotate')
it can be 0, 90, 180, 270 or None
The rotate attribute will override the mediaBox settings. To account for this, check the page rotation before making final judgement. Note the text too can be rotated.
from PyPDF2 import PdfFileReader
pdf_path = 'yourPDFname.pdf'
pdf_reader = PdfFileReader(pdf_path)
deg = pdf_reader.getPage(0).get('/Rotate')
page = pdf_reader.getPage(0).mediaBox
if page.getUpperRight_x() - page.getUpperLeft_x() > page.getUpperRight_y() -page.getLowerRight_y():
if deg in [0,180,None]:
print('Landscape')
else:
print('Portrait')
else:
if deg in [0,180,None]:
print('Portrait')
else:
print('Landscape')
You can detect it by using this code snippet:
from PyPDF2 import PdfFileReader
pdf = PdfFileReader(file('example.pdf'))
page = pdf.getPage(0).mediaBox
if page.getUpperRight_x() - page.getUpperLeft_x() > page.getUpperRight_y() -
page.getLowerRight_y():
print('Landscape')
else:
print('Portrait')
This one works, fully tested:
import PyPDF2
from PyPDF2 import PdfFileReader
pdf = PdfFileReader(open('YourPDFname.pdf', 'rb'))
page = pdf.getPage(0).mediaBox
if page.getUpperRight_x() - page.getUpperLeft_x() > page.getUpperRight_y() -
page.getLowerRight_y():
print('Landscape')
else:
print('Portrait')
i'm writing a program which takes all the pictures in a given folder and aggregates them into a pdf. The problem I have is that when the images are drawn, they are bigger in size and are rotated to the left oddly. I've searched everywhere, havent found anything even in the reportlab documentation.
Here's the code:
import os
from PIL import Image
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.units import cm
from StringIO import StringIO
def main():
images = image_search()
output = PdfFileWriter()
for image in images:
Image_file = Image.open(image) # need to convert the image to the specific size first.
width, height = Image_file.size
im_width = 1 * cm
# Using ReportLab to insert image into PDF
watermark_str = "watermark" + str(images.index(image)) + '.pdf'
imgDoc = canvas.Canvas(watermark_str)
# Draw image on Canvas and save PDF in buffer
# define the aspect ratio first
aspect = height / float(width)
## Drawing the image
imgDoc.drawImage(image, 0,0, width = im_width, height = (im_width * aspect)) ## at (399,760) with size 160x160
imgDoc.showPage()
imgDoc.save()
# Get the watermark file just created
watermark = PdfFileReader(open(watermark_str, "rb"))
#Get our files ready
pdf1File = open('sample.pdf', 'rb')
page = PdfFileReader(pdf1File).getPage(0)
page.mergePage(watermark.getPage(0))
#Save the result
output.addPage(page)
output.write(file("output.pdf","wb"))
#The function which searches the current directory for image files.
def image_search():
found_images = []
for doc in os.listdir(os.curdir):
image_ext = ['.jpg', '.png', '.PNG', '.jpeg', '.JPG']
for ext in image_ext:
if doc.endswith(ext):
found_images.append(doc)
return found_images
main()
I also tried scaling and specifying the aspect ratio using the im_width variable, which gave the same output.
After a little bit of confusion about your goal I figured out that the goal is to make a PDF overview of the images in the current folder. To do so we actual don't need PyPDF2 as Reportlab offers everything we need for this.
See the code below with the comments as guidelines:
def main():
output_file_loc = "overview.pdf"
imgDoc = canvas.Canvas(output_file_loc)
imgDoc.setPageSize(A4) # This is actually the default page size
document_width, document_height = A4
images = image_search()
for image in images:
# Open the image file to get image dimensions
Image_file = Image.open(image)
image_width, image_height = Image_file.size
image_aspect = image_height / float(image_width)
# Determine the dimensions of the image in the overview
print_width = document_width
print_height = document_width * image_aspect
# Draw the image on the current page
# Note: As reportlab uses bottom left as (0,0) we need to determine the start position by subtracting the
# dimensions of the image from those of the document
imgDoc.drawImage(image, document_width - print_width, document_height - print_height, width=print_width,
height=print_height)
# Inform Reportlab that we want a new page
imgDoc.showPage()
# Save the document
imgDoc.save()