How to change font in pdf file using pyPDF2 in python - python

How to change the font in pyPDF 2 module.
I tried print(help(canvas.Canvas))
I tried the initialFontName = None and initialFontSize = None
but my text didn't change. Also, I'm doing this using raspberry-pi with raspbian operating system.
Here's my code
can = canvas.Canvas(packet, pagesize=letter, initialFontName = 'Helvetica', initialFontSize = 5)
now = datetime.datetime.now()
can.drawString(250, 500, self.get_Firstn1 + ' ' + self.get_Middlen1 +' ' + self.get_Lastn1)
can.drawString(300, 550, "Hello World")
can.drawString(350, 600,"Hello")
can.save()
packet.seek(0)
new_pdf = PdfFileReader(packet)
existing_pdf = PdfFileReader(open("Sample.pdf", "rb"))
existing_pdf.decrypt('')
output = PdfFileWriter()
# add the "watermark" (which is the new pdf) on the existing page
page = existing_pdf.getPage(0)
page.mergePage(new_pdf.getPage(0))
output.addPage(page)
#write "output" to a real file
outputStream = open("destination.pdf", "wb")
output.write(outputStream)
outputStream.close()
#open pdf file
opener ="open" if sys.platform == "darwin"else "xdg-open"
subprocess.call([opener, "destination.pdf"])
I tried to change fonts like Arial, Times New Roman, Cambria etc. but it only accepts Helvetica and the text size and font didn't change.

Related

How to underlay or get transparency to watermark using FPDF2 on python?

I found his code, but it is overlaying the watermark in my document, actually i would like to underlay it, or get some transparency to my watermark:
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=18)
pdf.rotate(45 ,150,140)
pdf.set_text_color(255,235,205)
def create_watermark(input_pdf, output, watermark):
watermark_obj = PdfFileReader(watermark)
watermark_page = watermark_obj.getPage(0)
pdf_reader = PdfFileReader(input_pdf)
pdf_writer = PdfFileWriter()
# Watermark all the pages
for page in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page)
page.mergePage(watermark_page)
pdf_writer.addPage(page)
with open(output, 'wb') as out:
pdf_writer.write(out)
input_pdf = "Apostila de investimentos.pdf"
output = 'merged.pdf'
watermark = 'RODRIGO ROMÃO.pdf'
create_watermark(input_pdf, output, watermark)

Splitting PDF pages mid-way and re-combining it?

I dont have enough reputation score right now to answer a question I found - how to use python to split pdf pages into half and recombine it for further processing ..
#!/usr/bin/env python
'''
Chops each page in half, e.g. if a source were
created in booklet form, you could extract individual
pages, and re-combines it
'''
from PyPDF2 import PdfFileWriter,PdfFileReader,PdfFileMerger
#split left
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (60, 50)
page.cropBox.upperRight = (305, 700)
output.addPage(page)
with open("left.pdf", "wb") as out_f:
output.write(out_f)
#split right
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (300, 50)
page.cropBox.upperRight = (540, 700)
output.addPage(page)
with open("right.pdf", "wb") as out_f:
output.write(out_f)
#combine splitted files
input1 = PdfFileReader(open("left.pdf","rb"))
input2 = PdfFileReader(open("right.pdf","rb"))
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
l = input1.getPage(i)
output.addPage(l)
r = input2.getPage(i)
output.addPage(r)
with open("out.pdf", "wb") as out_f:
output.write(out_f)
Note : The cropping parameters are specific for your PDF , so, please, check it before execution of the program.
Further : Now, You can use this document to extract text easily without getting the columns merged into each other -- messed up extraction ..

Export to image not pdf in python PyPDF2 package

I have the following code that crops part of pdf file then save the output as PDF
from PyPDF2 import PdfFileWriter, PdfFileReader
with open("Sample.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
print("Document Has %s Pages." % numPages)
for i in range(1):
page = input1.getPage(i)
print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
page.trimBox.lowerLeft = (280, 280)
page.trimBox.upperRight = (220, 200)
page.cropBox.lowerLeft = (100, 720)
page.cropBox.upperRight = (220, 800)
output.addPage(page)
with open("Output.pdf", "wb") as out_f:
output.write(out_f)
How can I save as an image not as PDF?
I found this code but the output is not at high quality. How can I improve the quality of the image output?
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
page = doc.loadPage(0)
pix = page.getPixmap()
output = "Output.jpg"
pix.writePNG(output)
Hi There You Could Use The pdf2image library for achieving so.
You Could Use The Following Code At The End:
from pdf2image import convert_from_path
images = convert_from_path('Output.pdf')
for i in range(len(images)):
images[i].save('Output'+ str(i) +'.jpg', 'JPEG')
Then If You Wish You Could Use The os library to delete the pdf you made using the following code in order to avoid the hassle of deleting the pdf yourself.
import os
os.remove("Output.pdf")
This solves the problem but I welcome any advanced ideas and improvements
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
zoom = 2 # zoom factor
mat = fitz.Matrix(zoom, zoom)
page = doc.loadPage(0)
pix = page.getPixmap(matrix = mat)
output = "Output.jpg"
pix.writePNG(output)

How to add a relative file path inside a pdf using pypdf

Context
I have a pdf with links.
I want to replace all the external links with local files in the same folder.
Is there a way to do that in pypdf or python
e.g.
outputStream = open("destination.pdf", "wb")
key = '/Annots'
uri = '/URI'
ank = '/A'
import os
dir_path = os.path.dirname(os.path.realpath(__file__))
cwd = os.getcwd()
for x in range(existing_pdf.getNumPages()):
page = existing_pdf.getPage(x)
page_object = page.getObject()
if key in page_object:
ann = page_object[key]
for a in ann:
u = a.getObject()
if uri in u[ank]:
test = u[ank][uri]
test1 = u[ank].keys()
u[TextStringObject(ank)][TextStringObject(uri)] = TextStringObject(f"file:./foo1.pdf")
output.addPage(page)
# finally, write "output" to a real file
output.write(outputStream)
outputStream.close()
The above does not work i.e. the foo1.pdf is not linked properly.
If I add "file:///{CWD}/foo1.pdf" it works.
Is there a way to use relative path only?
After reading through the pdf structure and documentation I was able to write the following and it works as expected.
for x in range(existing_pdf.getNumPages()):
page = existing_pdf.getPage(x)
page_object = page.getObject()
if key in page_object:
ann = page_object[key]
for a in ann:
u = a.getObject()
if uri in u[ank]:
del u[TextStringObject(ank)][TextStringObject(uri)]
u[TextStringObject(ank)][NameObject('/F')] = TextStringObject(f"./sheets/sheet1.pdf")
u[TextStringObject(ank)][TextStringObject('/S')] = NameObject("/Launch")
u[TextStringObject(ank)][NameObject('/NewWindow')] = BooleanObject(f"true")
output.addPage(page)
# finally, write "output" to a real file
output.write(outputStream)
outputStream.close()

How to split/crop a pdf along the middle using pyPdf

I have a pdf that looks like this and i'd like to crop all the text out, almost right down the middle of the page. I found this script that does something simmilar:
def splitHorizontal():
from pyPdf import PdfFileWriter, PdfFileReader
input1 = PdfFileReader(file("in.pdf", "rb"))
output = PdfFileWriter()
numPages = input1.getNumPages()
print "document has %s pages." % numPages
for i in range(numPages):
page = input1.getPage(i)
print page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y()
page.trimBox.lowerLeft = (25, 25)
page.trimBox.upperRight = (225, 225)
page.cropBox.lowerLeft = (50, 50)
page.cropBox.upperRight = (200, 200)
output.addPage(page)
outputStream = file("out.pdf", "wb")
output.write(outputStream)
outputStream.close()
However these crop dimensions are tuned to that specific example.
Can anyone show me how to find the correct crop dimensions.
I originally got the script from here --> Cropping pages of a .pdf file.
I read more into what the author had said, finally realizing that he had said:
The resulting document has a trim box that is 200x200 points and starts at 25,25 points inside the media box. The crop box is 25 points inside the trim box.
meaning
page.cropBox.upperRight = (200, 200)
must control the ultimate margins, i therefore adjusted the statement to
page.cropBox.upperLeft = (290, 792)
To mirror the cropping onto the other side and make sure the cropping holds the full vertical value
Chops each page in half, e.g. if a source were
created in booklet form, and then re-combines it
for further processing eg. text extraction
Importing required libraries
from PyPDF2 import PdfFileWriter,PdfFileReader,PdfFileMerger
Splitting Left Part
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (60, 50)
page.cropBox.upperRight = (305, 700)
output.addPage(page)
with open("left.pdf", "wb") as out_f:
output.write(out_f)
Splitting right part :
with open("docu.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
page = input1.getPage(i)
page.cropBox.lowerLeft = (300, 50)
page.cropBox.upperRight = (540, 700)
output.addPage(page)
with open("right.pdf", "wb") as out_f:
output.write(out_f)
Combining left with right (two columns to two pages)
input1 = PdfFileReader(open("left.pdf","rb"))
input2 = PdfFileReader(open("right.pdf","rb"))
output = PdfFileWriter()
numPages = input1.getNumPages()
for i in range(numPages):
l = input1.getPage(i)
output.addPage(l)
r = input2.getPage(i)
output.addPage(r)
with open("out.pdf", "wb") as out_f:
output.write(out_f)
I faced the same challenge and I wrote this (require PyPDF2):
https://gist.github.com/kintaro1981/eb6cfc6f40a7fb39744f5ae630d58fd5
# -*- coding: utf-8 -*-
"""
This script splits each pdf page in half vertically and merge them in a single PDF in order.
It asks how many pages do you want to skip to avoid splitting a book cover or a specific number of initial pages.
Usage:
python cutpdfpages.py <filename.pdf> <newfilename.pdf>
"""
import sys
import copy
from PyPDF2 import PdfWriter, PdfReader
file = str(sys.argv[1])
newfile = str(sys.argv[2])
with open(file, "rb") as pdf1:
pdf = PdfReader(pdf1)
output = PdfWriter()
numpages = pdf.getNumPages()
page2skip = int(input('Insert how many pages do you want to skip: '))
for i in range(page2skip, numpages):
page = pdf.pages[i]
pagebis = copy.copy(page)
page.mediabox.upper_left = (page.mediabox.right / 2, page.mediabox.top,)
output.add_page(page)
pagebis.mediabox.upper_right = (pagebis.mediabox.right / 2, pagebis.mediabox.top,)
output.add_page(pagebis)
with open(newfile, "wb") as newpdf:
output.write(newpdf)

Categories