I've recently found this (wonderful) python software to convert multiple images to a single pdf, img2pdf. After create the first pdf I realized that every page hasn't got any title and it's difficult identify what's the original image (because there're 400), does anyone know how can I add a page title?
Thanks in advance.
I tried to find the same solution but ended up writing a Python program to solve it. I dont know if it helps you but here is a solution nonetheless.
In Python I used PIL.Image and ImageDraw to go through all images and put the filename in each if the images. After that I used img2pdf as a python library to generate the pdf.
Must be run it in the same folder of the images.
import os
import img2pdf
from PIL import Image, ImageDraw, ImageFont, ExifTags
# Enter the path to the font you want, 'fc-list' on ubuntu will get a list of fonts you can use.
#image_text_font = ImageFont.truetype('/Library/Fonts/Arial.ttf', 15)
image_text_font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf", 32)
# Tags the images with 'file name' in the upper left corner
def tag_images():
for file in os.listdir('.'):
if file.endswith(".jpg") and str(file+"_tagged.jpg") not in os.listdir('.') and not file.endswith("_tagged.jpg"):
one_image = check_and_adjust_rotation(Image.open(file))
one_image_draw = ImageDraw.Draw(one_image)
# Add textbox to image
size = one_image_draw.textsize(file, font=image_text_font)
offset = image_text_font.getoffset(file)
one_image_draw.rectangle((10, 10, 10 + size[0] + offset[0], 10 + size[1] + offset[1]), fill='white', outline='black')
# Add text to image
one_image_draw.text((10,10), file, font=image_text_font, fill='black')
# Save tagged image
one_image.save(file + "_tagged.jpg")
print(f'Tagged and saved "{file}_tagged.jpg".')
# Generate the PDF
def generate_pdf_from_multiple_images():
with open("output.pdf", "wb") as f:
f.write(img2pdf.convert([image_file for image_file in os.listdir('.') if image_file.endswith("_tagged.jpg")]))
# Use exif information about rotation to apply proper rotation to the image
def check_and_adjust_rotation(image):
try :
for orientation in ExifTags.TAGS.keys() :
if ExifTags.TAGS[orientation]=='Orientation' : break
exif=dict(image._getexif().items())
print(exif[orientation])
if exif[orientation] == 3 :
image=image.rotate(180, expand=True)
elif exif[orientation] == 6 :
image=image.rotate(270, expand=True)
elif exif[orientation] == 8 :
image=image.rotate(90, expand=True)
except:
traceback.print_exc()
return image
def main():
tag_images()
generate_pdf_from_multiple_images()
if __name__ == '__main__':
main()
Related
I was trying to extract images from a pdf using PyMuPDF (fitz). My pdf has multiple images in a single page. I am maintaining a proper sequence number while saving my images. I saw that the images being extracted don't follow a proper sequence. Sometimes it is starting to extract from the bottom, sometimes from the top and so on. Is there a way to modify my code so that the extraction follow a proper sequence?
Given below is the code I am using :
import fitz
from PIL import Image
filename = "document.pdf"
doc = fitz.open(filename)
for i in range(len(doc)):
img_num = 0
p_no = 1
for img in doc.getPageImageList(i):
xref = img[0]
pix = fitz.Pixmap(doc, xref)
if pix.n - pix.alpha < 4:
img_num += 1
pix.writeImage("%s-%s.jpg" % (str(p_no),str(img_num)))
else:
img_num += 1
pix1 = fitz.Pixmap(fitz.csRGB, pix)
pix1.writeImage("%s-%s.jpg" % (str(p_no),str(img_num)))
pix1 = None
pix = None
p_no += 1
Given below is a sample page of the pdf
I have the same problem I've used the following code:
import fitz
import io
from PIL import Image
file = "file_path"
pdf_file = fitz.open(file)
for page_index in range(len(pdf_file)):
# get the page itself
page = pdf_file[page_index]
image_list = page.getImageList()
# printing number of images found in this page
if image_list:
print(f"[+] Found {len(image_list)} images in page {page_index}")
else:
print("[!] No images found on the given pdf page", page_index)
for image_index, img in enumerate(page.getImageList(), start=1):
print(img)
print(image_index)
# get the XREF of the image
xref = img[0]
# extract the image bytes
base_image = pdf_file.extractImage(xref)
image_bytes = base_image["image"]
# get the image extension
image_ext = base_image["ext"]
# load it to PIL
image = Image.open(io.BytesIO(image_bytes))
# save it to local disk
image.save(open(f"image{page_index+1}_{image_index}.{image_ext}", "wb"))
The most probable way is to locate the 'img' var and order them.
I'd love to hear any further sggestions or if you found better idea/solution.
I'm having a hard time trying to access/save images using the python-pptx library. So, if the image is of shape type PICTURE (that's shape.shape_type == MSO_SHAPE_TYPE.PICTURE) I can access/save the image easily using the 'blob' attribute. Here is the code:
import argparse
import os
from PIL import Image
import pptx
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx import Presentation
from mdutils.mdutils import MdUtils
from mdutils import Html
def main():
parser = argparse.ArgumentParser()
parser.add_argument('ppt_name', type=str, help='add the name of the PowerPoint file(NOTE: the folder must be in the same directory as the prorgram file')
args = parser.parse_args()
pptx_name = args.ppt_name
pptx_name_formatted = pptx_name.split('.')[0]
prs = Presentation(pptx_name)
path = '{}_converted'.format(pptx_name_formatted)
if not os.path.exists(path):
os.mkdir(path)
images_folder = '{}_images'.format(pptx_name_formatted)
images_path = os.path.join(path, images_folder)
if not os.path.exists(images_path):
os.mkdir(images_path)
ppt_dict = {} #Keys: slide numbers, values: slide content
texts = []
slide_count = 0
picture_count = 0
for slide in prs.slides:
texts = []
slide_count += 1
for shape in slide.shapes:
if shape.has_text_frame:
if '\n' in shape.text:
splitted = shape.text.split('\n')
for word in splitted:
if word != '':
texts.append(word)
elif shape.text == '':
continue
else:
texts.append(shape.text)
elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
with open('{}/image{}_slide{}.png'.format(images_path, picture_count, slide_count), 'wb') as f:
f.write(shape.image.blob)
picture_count += 1
ppt_dict[slide_count] = texts
ppt_content = ''
for k,v in ppt_dict.items():
ppt_content = ppt_content + ' - Slide number {}\n'.format(k)
for a in v:
ppt_content = ppt_content + '\t - {}\n'.format(a)
mdFile = MdUtils(file_name='{}/{}'.format(path,path)) #second argument isn't path, it just shares the path name.
mdFile.write(ppt_content)
mdFile.create_md_file()
if __name__ == "__main__":
main()
The problem is when the picture is of shape type 'auto shape' , I tried a lot of approaches but to no avail. When I do run the following code for a shape that I know is a picture:
if shape.shape_type == MSO_SHAPE_TYPE.AUTO_SHAPE:
print(shape.auto_shape_type)
print(shape.fill.type)
#indented because it's in a for loop
It outputs RECTANGLE for shape.auto_shape_type
and PICTURE for shape.fill.type
But what I want now is to save the picture (maybe by writing the the binary image bytestream of the image). Can someone help?
The "link" to the image (part, having the blob) is in the fill definition. Using that you can get to the image.
Print out the XML for the surroundings of the fill definition with shape.fill._xPr.xml. That will give you a look at what you need to navigate to. Good chance it will look something like "rId9" with some particular other number where the "9" placeholder is in that example. Probably in the neighborhood of something like "blipfill". The image is used as the "fill" of the shape, so that's what's going on here.
Then get the slide part with something like slide._part and use its .related_parts "dict" to look up the image "fill" part using the relationship-id (the string like "rId9").
image_part = slide._part.related_parts["rId9"]
The ImagePart implementation is here:
https://github.com/scanny/python-pptx/blob/master/pptx/parts/image.py#L21
and it gives access to the image and a lot of details about it as well.
You'll have to retrieve the "rId9"-like string using lxml calls, something roughly like:
rIds = shape.fill._xPr.xpath(".//#embed")
rId = rIds[0]
You'll need to do a little research on XPath to work out the right expression, based on the XML you print out in the earlier step. There's a lot out there on XPath, including here on SO, this is one resource to get started: http://www.rpbourret.com/xml/XPathIn5.htm
If you can't work it out, post the XML you printed out and we can get you to the next step.
Here is my approach, thanks to scanny.
for slide in prs.slides:
slide_count += 1
slide_parts = list(slide._part.related_parts.keys())
for part in slide_parts:
image_part = slide._part.related_parts[part]
if type(image_part) == pptx.parts.image.ImagePart or pptx.opc.package.Part:
file_startswith = image_part.blob[0:1]
if file_startswith == b'\x89' or file_startswith == b'\xff' or file_startswith == b'\x47':
with open('{}/image{}_slide{}.png'.format(images_path, picture_count, slide_count), 'wb') as f:
f.write(image_part.blob)
picture_count += 1
the if condition to check for PNG, JPEG or GIF is there because pptx.opc.package.Part isn't always an image.
Actually, I think since I'm checking for the beginning of image_part.blob, I don't think I need to include say if type(image_part) == pptx.parts.image.ImagePart or pptx.opc.package.Part:
But as long as it's working...
I have the following code:
import face_recognition
from PIL import Image, ImageDraw
from tkinter import Tk
from tkinter.filedialog import askopenfilename
from shutil import copyfile
#Ask user for file name
Tk().withdraw()
filename = askopenfilename()
#Add known images
image_of_person = face_recognition.load_image_file(filename)
person_face_encoding = face_recognition.face_encodings(image_of_person)[0]
for i in range (1, 8):
#Construct the picture name and print it
file_name = str(i).zfill(5) + ".jpg"
print(file_name)
#Load the file
newPic = face_recognition.load_image_file(file_name)
#Search every detected face
for face_encoding in face_recognition.face_encodings(newPic):
results = face_recognition.compare_faces([person_face_encoding], face_encoding, 0.5)
#If match, show it
if results[0] == True:
copyFile(file_name, "./img/saved" + file_name)
The intention is to use the known image (image_of_person) and search a folder of images ('./img/unknown') for a match, then show the matched photo.
I receive the error:
No such file or directory: '00001.jpg'
On the line
newPic = face_recognition.load_image_file(file_name)
How do I point the recognition to the sample of images folder?
Note: for i in range (1, 8): - 8 Images are in the sample folder.
I think your problem is you're not giving the right path when trying to load the images.
Change
file_name = str(i).zfill(5) + ".jpg"
to
file_name = f"./img/unknown/{str(i).zfill(5)}.jpg"
Note: If you're using python2, then
file_name = "./img/unknown/{}.jpg".format(str(i).zfill(5)
Another tip, if you want your code to be generic, no matter how many images there are, you can do
for i in range(1, len(os.listdir("./img/unknown"))).
Or, even better, you can simply do
for img in os.listdir("img/unknown"):
file_name = os.path.join("img/unknown", img)
... continue with the rest of the flow ...
The context of my Computer Vision assignment: Computing a homography matrix, where one of the tasks is to create a GUI to select points on the images (to get the coordinates, and pixel values).
I've looked through StackOverflow and found some options, i.e using tkinter, but the answers were from 2011. I was wondering whether there are other or new options out there.
Here is the code that worked for me using openCV 3.3.1
For camera calibration see my github repo https://github.com/abhishek098/camera_calibration .
import numpy as np
import yaml
import cv2
'''
1. change the path to load image and store data
2. double click on the image to save the co-ordinates
3. images should be named as 0.jpg, 1.jpg, 2.jpg .....
4.
'''
# set image and data path here..
path = "/home/abhishek/stuff/object_detection/explore/"
points = []
def mouseCB(event,x,y,flags,param):
if event == cv2.EVENT_LBUTTONDBLCLK :
print (x, y)
points.append([x, y])
count = 0
exit_flag = True
points_list = []
while exit_flag:
window_name = 'image_' + str(count)
cv2.namedWindow(window_name)
cv2.setMouseCallback(window_name, mouseCB)
image_name = path + str(count) + '.jpg'
img = cv2.imread(image_name)
cv2.imshow(window_name, img)
while True:
ip = cv2.waitKey(0) & 0xFF
if ip == ord('n'):
count += 1
points_list.append(points)
points = []
cv2.destroyAllWindows()
break
elif ip == ord('q'):
exit_flag = False
break
print (count)
data = {'img_co': np.asarray(points_list).tolist()}
file = path + "data.yaml"
with open(file, "w") as f:
yaml.dump(data, f)
cv2.destroyAllWindows()
i'm writing a program which takes all the pictures in a given folder and aggregates them into a pdf. The problem I have is that when the images are drawn, they are bigger in size and are rotated to the left oddly. I've searched everywhere, havent found anything even in the reportlab documentation.
Here's the code:
import os
from PIL import Image
from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.units import cm
from StringIO import StringIO
def main():
images = image_search()
output = PdfFileWriter()
for image in images:
Image_file = Image.open(image) # need to convert the image to the specific size first.
width, height = Image_file.size
im_width = 1 * cm
# Using ReportLab to insert image into PDF
watermark_str = "watermark" + str(images.index(image)) + '.pdf'
imgDoc = canvas.Canvas(watermark_str)
# Draw image on Canvas and save PDF in buffer
# define the aspect ratio first
aspect = height / float(width)
## Drawing the image
imgDoc.drawImage(image, 0,0, width = im_width, height = (im_width * aspect)) ## at (399,760) with size 160x160
imgDoc.showPage()
imgDoc.save()
# Get the watermark file just created
watermark = PdfFileReader(open(watermark_str, "rb"))
#Get our files ready
pdf1File = open('sample.pdf', 'rb')
page = PdfFileReader(pdf1File).getPage(0)
page.mergePage(watermark.getPage(0))
#Save the result
output.addPage(page)
output.write(file("output.pdf","wb"))
#The function which searches the current directory for image files.
def image_search():
found_images = []
for doc in os.listdir(os.curdir):
image_ext = ['.jpg', '.png', '.PNG', '.jpeg', '.JPG']
for ext in image_ext:
if doc.endswith(ext):
found_images.append(doc)
return found_images
main()
I also tried scaling and specifying the aspect ratio using the im_width variable, which gave the same output.
After a little bit of confusion about your goal I figured out that the goal is to make a PDF overview of the images in the current folder. To do so we actual don't need PyPDF2 as Reportlab offers everything we need for this.
See the code below with the comments as guidelines:
def main():
output_file_loc = "overview.pdf"
imgDoc = canvas.Canvas(output_file_loc)
imgDoc.setPageSize(A4) # This is actually the default page size
document_width, document_height = A4
images = image_search()
for image in images:
# Open the image file to get image dimensions
Image_file = Image.open(image)
image_width, image_height = Image_file.size
image_aspect = image_height / float(image_width)
# Determine the dimensions of the image in the overview
print_width = document_width
print_height = document_width * image_aspect
# Draw the image on the current page
# Note: As reportlab uses bottom left as (0,0) we need to determine the start position by subtracting the
# dimensions of the image from those of the document
imgDoc.drawImage(image, document_width - print_width, document_height - print_height, width=print_width,
height=print_height)
# Inform Reportlab that we want a new page
imgDoc.showPage()
# Save the document
imgDoc.save()