How to Merge two pages from a pdf file as one page - python

I have a pdf in which there are total 6 pages of images.I want to merge page 1 and 2 as a single pdf and so on for 3 to 6 pages.
I splitted all 6 pages of pdf as individual pdf.
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
def pdf_splitter(path):
fname = os.path.splitext(os.path.basename(path))[0]
pdf = PdfFileReader(path)
for page in range(pdf.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdf.getPage(page))
output_filename = '{}_page_{}.pdf'.format(
fname, page+1)
with open(output_filename, 'wb') as out:
pdf_writer.write(out)
print('Created: {}'.format(output_filename))
if name == 'main':
path = 'D:\Tasks\Samples\fw9.pdf'
pdf_splitter(path)
I want to know how to merge page 1 and 2 of fw9 as single pdf file which contains only 1 page which have half page as page 1 of fw9 pdf file and another half as page 2 of fw9 pdf.I have to do this for all 6 pages as 1-2 as 1 pdf with 1 page ,3-4 page as another pdf which has only 1 page with both on the same page and so on.Kindly help if any one have any idea on how to do so.

The library pyPDF2 has also a PdfFileMerger object, that should do exactly what you want.
As from the example here you can just create a PdfFileMerger, read two pages and put them into one single file.
I changed your script slightly to create also files with pages 0-1, 2-3, 4-5 ecc.. (of course page 0 is the first page but python numbering starts from 0)
import os
from PyPDF2 import PdfFileReader, PdfFileWriter, PdfFileMerger
def pdf_splitter(path):
fname = os.path.splitext(os.path.basename(path))[0]
pdf = PdfFileReader(path)
input_paths = []
for page in range(pdf.getNumPages()):
pdf_writer = PdfFileWriter()
pdf_writer.addPage(pdf.getPage(page))
output_filename = '{}_page_{}.pdf'.format(fname, page+1)
input_paths.append(output_filename)
with open(output_filename, 'wb') as out:
pdf_writer.write(out)
print('Created: {}'.format(output_filename))
# every 2 pages!
# Change the two if you need every other number of pages!
if page % 2 == 1:
pdf_merger = PdfFileMerger() #create pdfilemerger
for path in input_paths:
pdf_merger.append(path) #read the single pages
# we call it pages_N-1_N, so first would be pages_0_1!
output_path = '{}_pages_{}_{}.pdf'.format(fname, page-1, page)
with open(output_path, 'wb') as fileobj:
pdf_merger.write(fileobj) # write the two pages pdf!
input_paths = []
if __name__ == '__main__':
path = 'D:\Tasks\Samples\fw9.pdf'
pdf_splitter(path)
Is this what you wanted?
This will first create single pdf for each page and then combine them 2 to 2. Creating the single pdf could also be skipped, but I was not sure whether you want it or not.

from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2 import PageObject
#Open the files that have to be merged
pdf1File = open('document.pdf', 'rb')
#Read the files that you have opened
pdf1Reader = PdfFileReader(pdf1File)
#Make a list of all pages
pages = []
for pageNum in range(pdf1Reader.numPages):
pageObj = pdf1Reader.getPage(pageNum)
pages.append(pageObj)
#Calculate width and height for final output page
width = pages[0].mediaBox.getWidth() * 6
height = pages[0].mediaBox.getHeight() + 100
#Create blank page to merge all pages in one page
merged_page = PageObject.createBlankPage(None, width, height)
#Loop through all pages and merge / add them to blank page
x = 0
for page in pages:
merged_page.mergeScaledTranslatedPage(page, 1, x, 10)
x = float(x) + float(page.mediaBox.getWidth())
#Create final file with one page
writer = PdfFileWriter()
writer.addPage(merged_page)
with open('out.pdf', 'wb') as f:
writer.write(f)
I wanted to merge 6 files / page so I have used 6 as a multiplier for page width.

this is the answer of how to merge two pages to one page in vertical way
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2 import PageObject
#Open the files that have to be merged
pdf1File = open('1.pdf', 'rb')
#Read the files that you have opened
pdf1Reader = PdfFileReader(pdf1File)
#Make a list of all pages
pages = []
for pageNum in range(pdf1Reader.numPages):
pageObj = pdf1Reader.getPage(pageNum)
pages.append(pageObj)
#Calculate width and height for final output page
width = pages[1].mediaBox.getWidth() * 2
height = pages[1].mediaBox.getHeight()
#Create blank page to merge all pages in one page
merged_page = PageObject.createBlankPage(None, width, height)
writer = PdfFileWriter()
#Loop through all pages and merge / add them to blank page
y =0
merged_page = PageObject.createBlankPage(None, width, height)
for page in range(len(pages)):
y+=1
if y%2!=0:
merged_page.mergePage(pages[page])
x=float(pages[page+1].mediaBox.getWidth())
merged_page.mergeScaledTranslatedPage(pages[page+1], 1,x, 0)
if y%2==0:
writer.addPage(merged_page)
merged_page = PageObject.createBlankPage(None, width, height)
y=0
#Create final file with one page
with open('out.pdf', 'wb') as f:
writer.write(f)

Related

How to replace a word in pdf with Python

i want to replace a word in a pdf but when i try to do that it always returns me same pdf. Here is my code block. Currentyle i am using pypdf2 but if is there any suggestion i can switch it. What is the missing part at my code?
with open(file_path, 'rb') as file:
pdf_reader = PdfFileReader(file)
# Encrypt the word in the PDF content
encrypted_word = self.cipher.encrypt(word_to_encrypt_bytes)
encrypted_word_b64 = base64.b64encode(encrypted_word)
# Write the encrypted PDF content to a new PDF file
pdf_writer = PdfFileWriter()
for i in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(i)
page_content = page.extractText()
page_content_b = page_content.encode('utf-8')
page_content_b = page_content_b.replace(word_to_encrypt.encode(), encrypted_word_b64)
page_content = page_content_b.decode('utf-8')
pdf_writer.addPage(page)
output_path = os.path.join(file_dir, file_name_without_ext + '_encryptedm' + ext)
with open(output_path, 'wb') as output_file:
pdf_writer.write(output_file)
I want to place a word in my pdf.
It looks like you are only replacing the word in the extracted text, but not actually updating the PDF page content. To do this, you can use the setContentStreams method of the page object to replace the content stream with the updated content.
Here's an updated code block that should work:
from PyPDF2 import PdfFileReader, PdfFileWriter
import base64
with open(file_path, 'rb') as file:
pdf_reader = PdfFileReader(file)
# Encrypt the word in the PDF content
encrypted_word = self.cipher.encrypt(word_to_encrypt_bytes)
encrypted_word_b64 = base64.b64encode(encrypted_word)
# Write the encrypted PDF content to a new PDF file
pdf_writer = PdfFileWriter()
for i in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(i)
page_content = page.extractText()
page_content_b = page_content.encode('utf-8')
updated_content_b = page_content_b.replace(word_to_encrypt.encode(), encrypted_word_b64)
page_content = updated_content_b.decode('utf-8')
page_content_streams = [b"q\n"] + page.getContents().split(b"q\n")[1:]
updated_content_streams = [b"q\n"] + updated_content_b.split(b"q\n")[1:]
page.setContentStreams(updated_content_streams)
pdf_writer.addPage(page)
output_path = os.path.join(file_dir, file_name_without_ext + '_encryptedm' + ext)
with open(output_path, 'wb') as output_file:
pdf_writer.write(output_file)
In this updated code, we first extract the page content as text, replace the word, and then convert it back to bytes. We then get the existing content streams of the page using the getContents method, split them on the q operator (which marks the beginning of a new graphics state), and prepend a q operator to the updated content streams (since the first graphics state is not included in the extracted content). Finally, we set the updated content streams using the setContentStreams method of the page object, and add the updated page to the PDF writer.

Extract specific pages from a single pdf file and save as separate individual files

I'm very new to Python. I just started a week ago and am trying to learn some cool stuff around PDF, but really don't know how to go about this.
I have the attached pdf file that I would like to extract all the pages between the keywords "PAGE START" and "PAGE END" and save as single files in 1 folder. For example, create a folder called "test" and save pages 3,4,5 under the filename first. pdf, as well as pages 10, 11, and 12 under the filename second. pdf. The pages identified are all between PAGE START AND PAGE END, not including the PAGE START AND PAGE END pages themselves..
My attempt:
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
reader = PdfFileReader("test.pdf")
StartString = "PAGE START"
EndString = "PAGE END"
for page in range(reader.getNumPages() - 1):
writer = PdfFileWriter()
PageObj = reader.getPage(page)
Text = PageObj.extractText()
ResSearch = re.search(StartString, Text)
if ResSearch is not None:
start = information[page][1]
end = information[page][2]
while start < end:
writer.addPage(reader.getPage(start))
start += 1
output_filename = "{}_{}_page_{}.pdf".format(
information[page][0], information[page][1], information[page][2]
)
with open(output_filename, "wb") as out:
writer.write(out)

Merge two pages into pdf into one page pdf

There's a pdf file of two pages that I need to merge these two pages to be just one page in a new pdf output. Simply the new output pdf should be of one page
I have the following code that enables me to do that task but the problem is that the output is the two pages side by side. I need the page to be vertically one page after another inside the same page
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.pdf import PageObject
pdf_filenames = ['Sample.pdf']
inputpdf = PdfFileReader(open(pdf_filenames[0], 'rb'), strict=False)
page1 = inputpdf.getPage(0)
page2 = inputpdf.getPage(1)
total_width = page1.mediaBox.upperRight[0] + page2.mediaBox.upperRight[0]
total_height = max([page1.mediaBox.upperRight[1], page2.mediaBox.upperRight[1]])
new_page = PageObject.createBlankPage(None, total_width, total_height)
new_page.mergePage(page1)
new_page.mergeTranslatedPage(page2, page1.mediaBox.lowerRight[0], 0)
output = PdfFileWriter()
output.addPage(new_page)
output.write(open('MergedPDF.pdf', 'wb'))
I could solve it using the following code, but I welcome any ideas
Another point I need, is it possible to control the size of the page before merging?
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import PageObject
reader = PdfFileReader(open('Sample.pdf','rb'))
page_1 = reader.getPage(0)
page_2 = reader.getPage(1)
translated_page = PageObject.createBlankPage(None, page_1.mediaBox.getWidth(), page_1.mediaBox.getHeight()*2)
translated_page.mergeScaledTranslatedPage(page_1, 1, 0, page_1.mediaBox.getHeight())
translated_page.mergePage(page_2)
writer = PdfFileWriter()
writer.addPage(translated_page)
with open('MergedPDF.pdf', 'wb') as f:
writer.write(f)

Merge two pages in pdf to single pdf one page

I have the following code that merges a pdf file with two pages to a single pdf with one page only
Page two goes below page one in portrait form and everything till now is OK.
Is it possible to add padding white spaces around each page before merging ?
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import PageObject
def merge_two_pages_into_single(pdffile, outfile):
reader = PdfFileReader(open(pdffile, 'rb'))
page_1 = reader.getPage(0)
page_2 = reader.getPage(1)
translated_page = PageObject.createBlankPage(None, page_1.mediaBox.getWidth(), page_1.mediaBox.getHeight()*2)
translated_page.mergeScaledTranslatedPage(page_1, 1, 0, page_1.mediaBox.getHeight())
translated_page.mergePage(page_2)
writer = PdfFileWriter()
writer.addPage(translated_page)
with open(outfile, 'wb') as f:
writer.write(f)
merge_two_pages_into_single('231072600062.pdf', 'MergedPDF.pdf')

Pdf Imposition using Python

I am trying to have the first page and second page of the pdf imposed on to page 1. The first page will be above the second page, imposed on the first page.
The issue is the pages are not triming, or merging. The last page imposes on the second to last and that is it.
from PyPDF2 import PdfFileReader, PdfFileWriter
output = PdfFileWriter()
file_name = '81plots.pdf'
file = PdfFileReader(open(file_name, 'rb'))
i = 1
for i in range(file.getNumPages()):
page = file.getPage(i-1)
page.trimBox.LowerLeft = (0, 395.28422)
page.trimBox.LowerRight = (1459.75542, 395.28422)
page.trimBox.UpperLeft = (0, 790.56844)
page.trimBox.UpperRight = (1459.75542, 790.56844)
page_step = file.getPage(i)
page_step.trimBox.LowerLeft = (0,0)
page_step.trimBox.LowerRight = (1459.75542, 0)
page_step.trimBox.UpperLeft = (0, 395.28422)
page_step.trimBox.UpperRight = (1459.75542, 395.28422)
page.mergePage(page_step)
output.addPage(page)
outfile = 'testfile.pdf'
with open(outfile, 'wb') as file:
output.write(file)
The trim box is not really applicable to what you are trying to do.
I suggest that you start from a blank page and use PageObject class's mergeScaledTranslatedPage method to place the content of both pages on to the new page.

Categories