I want to convert image to pdf in python.
this is my code:
import docx
from docx.shared import Inches, Mm
import os
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
from docx2pdf import convert
from wand.image import Image as Im
image_dir = os.listdir(os.getcwd()+'\\Images')
print(len(image_dir))
doc = docx.Document()
section = doc.sections[0]
section.page_height = Mm(1000)
section.page_width = Mm(580)
section.left_margin = Mm(25.4)
section.right_margin = Mm(25.4)
section.top_margin = Mm(25.4)
section.bottom_margin = Mm(25.4)
section.header_distance = Mm(12.7)
section.footer_distance = Mm(12.7)
p = doc.add_paragraph()
x = 0
for i in range(0, len(image_dir)):
size = (130, 160)
temp_img = Image.open(os.getcwd()+'\\Images\\'+image_dir[i])
temp_img = temp_img.resize(size)
# temp_img.thumbnail(size, Image.ANTIALIAS)
# temp_img.show()
background = Image.new('RGBA', (500, 220), (255, 255, 255, 0))
for k in range(0, 3):
background.paste(temp_img, (0,0))
background.paste(temp_img, (150,0))
background.paste(temp_img, (300,0))
font = ImageFont.truetype(r'arial.ttf', 25)
d1 = ImageDraw.Draw(background)
d1.text((5, 160), image_dir[i][:-4], fill =(0, 0, 0), font = font)
background.save("temp.png")
with Im(filename ="temp.png") as img:
# generating sharp image using sharpen() function.
img.sharpen(radius = 16, sigma = 8)
img.save(filename ="temp1.png")
r = p.add_run()
r.add_picture("temp1.png")
doc.save('demo1.docx')
convert("demo1.docx")
This code run well. But IMG quality of pdf is poor and process is very slowly.
I want to improve convert speed.
Somebody help me. Thank you.
I think PIL is enough for you to combine an image with text into a PDF.
for example, you can save the combined images with text like this
import os
from PIL import Image, ImageDraw, ImageFont
image_dir = os.listdir(os.getcwd()+'\\Images')
for i in range(0, len(image_dir)):
size = (130, 160)
temp_img = Image.open(os.getcwd()+'\\Images\\'+image_dir[i])
temp_img = temp_img.resize(size)
background = Image.new('RGB', (500, 220), (255, 255, 255))
for k in range(0, 3):
background.paste(temp_img, (0,0))
background.paste(temp_img, (150,0))
background.paste(temp_img, (300,0))
font = ImageFont.truetype('arial.ttf', 25)
d1 = ImageDraw.Draw(background)
d1.text((5, 160), image_dir[i][:-4], fill =(0, 0, 0), font = font)
background.save(f"{image_dir[i][:-4]}.pdf")
This is my try. Just give the directory path and its done; you end up with a subdirectory containing all the PDF files
from pathlib import *
from PIL import Image
# path input #
path = input("Enter The path of you Images directory: \n")
path = Path(f"{path}")
####################################################
# making a subdirectory to contain the PDF version #
(path/"PDF's").mkdir(exist_ok=True)
# iterating over every file in the given directory #
# we use try function to ignore non image files #
for pp in path.glob("*"):
try:
with Image.open(pp) as image:
im = image.convert("RGB")
pth = Path(f"{path}\PDF's\{pp.stem}.pdf")
im.save(pth, save_all=True)
except:
pass
print("Conversion Completed ...")
# if you want to merge PDF's uncomment the next segment
#import PyPDF2
# merger = PyPDF2.PdfFileMerger()
# path = Path(f"{path}\PDF's")
# # (path/"MergedPDF's").touch()
# if (path/"MergedPDF's.pdf").exists():
# (path/"MergedPDF's.pdf").unlink()
# for pdf in path.glob("*.pdf"):
# merger.append(PyPDF2.PdfReader(pdf, "rb"))
# output = open(path/"MergedPDF's.pdf", "wb")
# merger.write(output)
# merger.close()
Pillow solution is pretty good in my opinion. But if u need more control over your pdf I suggest u to use PyMuPDF. Best library for pdf manipulation, hands down.
# copy image files to PDF pages
# each page will have image dimensions
import fitz
doc = fitz.open() # new PDF
imglist = [ ... image file names ...] # e.g. a directory listing
for img in imglist:
imgdoc=fitz.open(img) # open image as a document
pdfbytes=imgdoc.convert_to_pdf() # make a 1-page PDF of it
imgpdf=fitz.open("pdf", pdfbytes)
doc.insert_pdf(imgpdf) # insert the image PDF
doc.save("allmyimages.pdf")
Is pretty handy if u want to add metadata:
import fitz
doc = fitz.open()
metadata = {'producer': 'YourName', 'format': 'PDF 1.4', 'encryption': None, 'author': 'YourName',
'modDate': 'none', 'keywords': 'none', 'title': 'YourPdf', 'creationDate': 'none',
'creator': 'none', 'subject': 'none'} # your metadata needs to be a dictionary
doc.set_metadata(metadata)
table of contents:
import fitz
doc = fitz.open()
# the toc is a basically a list of lists. Each list has 3 elements:
# - first: the layer of the toc link (basically the main category (1), subcategory (2) etc..)
# - second: title of the layer
# - third: page where the title is linked
table_of_content = [[1, 'The PyMuPDF Documentation', 1], [2, 'Introduction', 1], [3, 'Note on the Name fitz', 1], [3, 'License', 1]]
doc.set_toc(table_of_content)
etc... I think that giving a look at the documentation is pretty useful
Related
Say I use the following method to create a three images. I then want to combine these three images into a GIF & display the GIF (jupyter notebook, python 3). All the methods I've seen online & stackoverflow for creating GIFs include saving the images as files & then importing them. For instance, this thread. But is there a way to just generate a gif without having to save/import image files? So, in the following code, using three versions of the im=Image.fromarray(arr.astype('uint8')) generated image to create a gif on the spot?
import numpy as np
from PIL import Image
arr = np.random.randint(low = 0, high = 255, size = (300, 300, 3))
im = Image.fromarray(arr.astype('uint8'))
im.show()
I guess you need something like this. GIF is an image file type so you have to save it to have one.
#! /usr/bin/env python3
import numpy as np
from PIL import Image
im = []
for n in range(20):
arr = np.random.randint(low = 0, high = 255, size = (300, 300, 3))
im.append(Image.fromarray(arr.astype('uint8')))
im[0].save('im.gif', save_all=True, append_images=im[1:], optimize=False, duration=200, loop=0)
#im[0].show()
Then open im.gif with a browser or some app that can show animated GIFs.
If you really don't want to save the GIF but just show it, you can do something like this
#! /usr/bin/env python3
import base64
import io
import numpy as np
from PIL import Image
from viaduc import Viaduc
im = []
for n in range(20):
arr = np.random.randint(low = 0, high = 255, size = (300, 300, 3))
im.append(Image.fromarray(arr.astype('uint8')))
buffer = io.BytesIO()
im[0].save(buffer, format='GIF', save_all=True, append_images=im[1:], optimize=False, duration=200, loop=0)
buffer.seek(0)
data_uri = base64.b64encode(buffer.read()).decode('ascii')
class Presentation(Viaduc.Presentation):
width = 300
height = 300
title = 'gif'
html = '''
<!DOCTYPE html>
<head>
{{bootstrap_meta}} {{bootstrap_css}}
<title>{{title}}</title>
</head>
<body>
<img src="data:image/gif;base64,''' + data_uri + '''">
{{bootstrap_js}}
</body>
</html>
'''
if __name__ == '__main__':
Viaduc(presentation=Presentation())
Firstly, I am simply reading images from the folder that contains different image formats. Secondly, the YOLO model detects the class, draws a rectangle and fills it with color only detected part, and saves it into another folder with the same name.
Second Case, If the model didn't detect anything in an image then it will save the same image with the same name but in a different folder.
My codebase is stuck on the first image and never moves on to the second image. I have no idea what is the problem happening.
Code
import torch
import cv2
from matplotlib import pyplot as plt
from utils.plots import Annotator, colors, save_one_box
import os
import glob
# Load Ours Custom Model
model = torch.hub.load('.', 'custom', path='/media/bmvc/CM_1/yolov5/runs/train/exp4/weights/last.pt', source='local')
# Files extension
img_Extension = ['jpg', 'jpeg', 'png']
# Load all testing images
my_path = "/home/bmvc/Documents/hide_info_test_dataset/testing_images/"
# Save images into array
files = []
[files.extend(glob.glob(my_path + '*.' + e)) for e in img_Extension]
# Iteration on all images
images = [cv2.imread(file) for file in files]
total_images = 1
# Taking only image name to save with save name
image_file_name = ''
for file in files:
for im in images:
detections = model(im[..., ::-1])
results = detections.pandas().xyxy[0].to_dict(orient="records")
if len(results) == 0:
cv2.imwrite(os.path.join("/home/bmvc/Documents/hide_info_test_dataset/detected/", file), im)
else:
for result in results:
print(result['class'])
con = result['confidence']
cs = result['class']
x1 = int(result['xmin'])
y1 = int(result['ymin'])
x2 = int(result['xmax'])
y2 = int(result['ymax'])
imagee = cv2.rectangle(im, (x1, y1), (x2, y2), (0, 255, 0), -1)
cv2.imwrite(os.path.join("/home/bmvc/Documents/hide_info_test_dataset/detected/", file), im)
total_images += 1
I have put a lot of loops that are completely useless for example reading different extension files, reading only images. I have improved the overall implementation and used only one loop to fix the above problem.
import torch
import cv2
from PIL import Image
from utils.plots import Annotator, colors, save_one_box
import os
import glob
import numpy as np
# Load Ours Custom Model
model = torch.hub.load('.', 'custom', path='/media/bmvc/CM_1/yolov5/runs/train/exp4/weights/last.pt', source='local')
# Files extension
img_Extension = ['jpg', 'jpeg', 'png']
# Load all testing images
my_path = "/home/bmvc/Documents/hide_info_test_dataset/testing_images/"
# Save images into array
files = []
[files.extend(glob.glob(my_path + '*.' + e)) for e in img_Extension]
# Iteration on all images
images = [cv2.imread(file) for file in files]
total_images = 1
# Taking only image name to save with save name
image_file_name = ''
for img in glob.glob(my_path + '*.*'):
img_bgr_rgb = cv2.imread(img)
file_Name = os.path.basename(img)
detections = model(img_bgr_rgb[:, :, ::-1])
results = detections.pandas().xyxy[0].to_dict(orient="records")
if len(results) == 0:
cv2.imwrite(os.path.join("/home/bmvc/Documents/hide_info_test_dataset/detected/", file_Name), img_bgr_rgb)
else:
for result in results:
print(result['class'])
con = result['confidence']
cs = result['class']
x1 = int(result['xmin'])
y1 = int(result['ymin'])
x2 = int(result['xmax'])
y2 = int(result['ymax'])
imagee = cv2.rectangle(img_bgr_rgb, (x1, y1), (x2, y2), (255, 87, 51), -1)
cv2.imwrite(os.path.join("/home/bmvc/Documents/hide_info_test_dataset/detected/", file_Name), img_bgr_rgb)
I have the following code that crops part of pdf file then save the output as PDF
from PyPDF2 import PdfFileWriter, PdfFileReader
with open("Sample.pdf", "rb") as in_f:
input1 = PdfFileReader(in_f)
output = PdfFileWriter()
numPages = input1.getNumPages()
print("Document Has %s Pages." % numPages)
for i in range(1):
page = input1.getPage(i)
print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
page.trimBox.lowerLeft = (280, 280)
page.trimBox.upperRight = (220, 200)
page.cropBox.lowerLeft = (100, 720)
page.cropBox.upperRight = (220, 800)
output.addPage(page)
with open("Output.pdf", "wb") as out_f:
output.write(out_f)
How can I save as an image not as PDF?
I found this code but the output is not at high quality. How can I improve the quality of the image output?
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
page = doc.loadPage(0)
pix = page.getPixmap()
output = "Output.jpg"
pix.writePNG(output)
Hi There You Could Use The pdf2image library for achieving so.
You Could Use The Following Code At The End:
from pdf2image import convert_from_path
images = convert_from_path('Output.pdf')
for i in range(len(images)):
images[i].save('Output'+ str(i) +'.jpg', 'JPEG')
Then If You Wish You Could Use The os library to delete the pdf you made using the following code in order to avoid the hassle of deleting the pdf yourself.
import os
os.remove("Output.pdf")
This solves the problem but I welcome any advanced ideas and improvements
import fitz
pdffile = "Output.pdf"
doc = fitz.open(pdffile)
zoom = 2 # zoom factor
mat = fitz.Matrix(zoom, zoom)
page = doc.loadPage(0)
pix = page.getPixmap(matrix = mat)
output = "Output.jpg"
pix.writePNG(output)
I have this code that I am using to change the images I have saved in a folder called 'images' from .png to .xml with the additional information about them. When I run this code I only get the .xml file for image 000001 which I understand because I am having the code select that specific image. I am unsure how though to select multiple images in my file at a single time. I have images named from 000000 to 000355. Any advice would be great! really do not want to manually run the code 355 times!
import os
import cv2
from lxml import etree
import xml.etree.cElementTree as ET
def write_xml(folder, img, objects, tl, br, savedir):
if not os.path.isdir(savedir):
os.mkdir(savedir)
image = cv2.imread(img.path)
height, width, depth = image.shape
annotation = ET.Element('annotation')
ET.SubElement(annotation, 'folder').text = folder
ET.SubElement(annotation, 'filename').text = img.name
ET.SubElement(annotation, 'segmented').text = '0'
size = ET.SubElement(annotation, 'size')
ET.SubElement(size, 'width').text = str(width)
ET.SubElement(size, 'height').text = str(height)
ET.SubElement(size, 'depth').text = str(depth)
for obj, topl, botr in zip(objects, tl, br):
ob = ET.SubElement(annotation, 'object')
ET.SubElement(ob, 'name').text = obj
ET.SubElement(ob, 'pose').text = 'Unspecified'
ET.SubElement(ob, 'truncated').text = '0'
ET.SubElement(ob, 'difficult').text = '0'
bbox = ET.SubElement(ob, 'bndbox')
ET.SubElement(bbox, 'xmin').text = str(topl[0])
ET.SubElement(bbox, 'ymin').text = str(topl[1])
ET.SubElement(bbox, 'xmax').text = str(botr[0])
ET.SubElement(bbox, 'ymax').text = str(botr[1])
xml_str = ET.tostring(annotation)
root = etree.fromstring(xml_str)
xml_str = etree.tostring(root, pretty_print=True)
save_path = os.path.join(savedir, img.name.replace('png', 'xml'))
with open(save_path, 'wb') as temp_xml:
temp_xml.write(xml_str)
if __name__ == '__main__':
"""
for testing
"""
folder = 'images'
img = [im for im in os.scandir('images') if '000001' in im.name][0]
objects = ['auv']
tl = [(10, 10)]
br = [(100, 100)]
savedir = 'annotations'
write_xml(folder, img, objects, tl, br, savedir)
The basic idea is to make loop going through each of your image files, and do what you did for a single image before for each:
for img in os.scandir('images'):
objects = ['auv']
tl = [(10, 10)]
br = [(100, 100)]
savedir = 'annotations'
write_xml(folder, img, objects, tl, br, savedir)
(You might need to change the expression for your list of images, as it might now include things you don't want to process.)
i'm writing a program which takes all the pictures in a given folder and aggregates them into a pdf. The problem I have is that when the images are drawn, they are bigger in size and are rotated to the left oddly. I've searched everywhere, havent found anything even in the reportlab documentation.
Here's the code:
import os
from PIL import Image
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.units import cm
from StringIO import StringIO
def main():
images = image_search()
output = PdfFileWriter()
for image in images:
Image_file = Image.open(image) # need to convert the image to the specific size first.
width, height = Image_file.size
im_width = 1 * cm
# Using ReportLab to insert image into PDF
watermark_str = "watermark" + str(images.index(image)) + '.pdf'
imgDoc = canvas.Canvas(watermark_str)
# Draw image on Canvas and save PDF in buffer
# define the aspect ratio first
aspect = height / float(width)
## Drawing the image
imgDoc.drawImage(image, 0,0, width = im_width, height = (im_width * aspect)) ## at (399,760) with size 160x160
imgDoc.showPage()
imgDoc.save()
# Get the watermark file just created
watermark = PdfFileReader(open(watermark_str, "rb"))
#Get our files ready
pdf1File = open('sample.pdf', 'rb')
page = PdfFileReader(pdf1File).getPage(0)
page.mergePage(watermark.getPage(0))
#Save the result
output.addPage(page)
output.write(file("output.pdf","wb"))
#The function which searches the current directory for image files.
def image_search():
found_images = []
for doc in os.listdir(os.curdir):
image_ext = ['.jpg', '.png', '.PNG', '.jpeg', '.JPG']
for ext in image_ext:
if doc.endswith(ext):
found_images.append(doc)
return found_images
main()
I also tried scaling and specifying the aspect ratio using the im_width variable, which gave the same output.
After a little bit of confusion about your goal I figured out that the goal is to make a PDF overview of the images in the current folder. To do so we actual don't need PyPDF2 as Reportlab offers everything we need for this.
See the code below with the comments as guidelines:
def main():
output_file_loc = "overview.pdf"
imgDoc = canvas.Canvas(output_file_loc)
imgDoc.setPageSize(A4) # This is actually the default page size
document_width, document_height = A4
images = image_search()
for image in images:
# Open the image file to get image dimensions
Image_file = Image.open(image)
image_width, image_height = Image_file.size
image_aspect = image_height / float(image_width)
# Determine the dimensions of the image in the overview
print_width = document_width
print_height = document_width * image_aspect
# Draw the image on the current page
# Note: As reportlab uses bottom left as (0,0) we need to determine the start position by subtracting the
# dimensions of the image from those of the document
imgDoc.drawImage(image, document_width - print_width, document_height - print_height, width=print_width,
height=print_height)
# Inform Reportlab that we want a new page
imgDoc.showPage()
# Save the document
imgDoc.save()