Download WMF from pptx and decode to JPEG - python

Hello stackoverflow users.
I am trying to download an image from the powerpoint presentation and then to process it(to recognize numbers on it at certain coordinates).
My problem is that I can download an image from pptx data only in .wmf format, and I cannot convert it. I have tried all possible solutions already.
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
pptx_path = "name_pptx.pptx"
prs = Presentation(pptx_path)
desired_slide = prs.slides[6 - 1]
for shape in desired_slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
image_file_bytes = shape.image.blob
file_extension = shape.image.ext # at this point format is .wfm
Interesting that in Powerpoint I can select a desired .jpeg extension when saving a file.

It took me few hours to solve my problem, convertation of wmf file to jpg is a bit tricky in Windows. I add the image to temporary excel file, and then download an image from it.
class ExcelHelpers():
#staticmethod
def add_img_to_excel(path_to_wmf):
import xlsxwriter
workbook = xlsxwriter.Workbook('test.xlsx')
worksheet = workbook.add_worksheet()
worksheet.insert_image('A1', path_to_wmf)
workbook.close()
#staticmethod
def get_img_from_excel(long_filename):
filename = os.path.basename(long_filename).split('.')[0]
from PIL import ImageGrab
import win32com.client as win32
excel = win32.gencache.EnsureDispatch('Excel.Application')
path_to_excel = os.path.join(os.getcwd(), 'test.xlsx')
workbook = excel.Workbooks.Open(path_to_excel)
for sheet in workbook.Worksheets:
for i, shape in enumerate(sheet.Shapes):
if shape.Name.startswith('Picture'):
shape.Copy()
image = ImageGrab.grabclipboard()
image.save('{}.jpg'.format(filename), 'jpeg')
workbook.Close()
excel.Quit()
del excel
os.remove(long_filename)
os.remove('test.xlsx')

Related

Rotate an image in a Workbook using openpyxl module in python

Is there a way to rotate an image inserted into a Workbook created with openpyxl in Python? I can change the width and height of the image but there is no rotate attribute for the image. I can definitely import the image using PIL and rotate it but that snippet code could not be used with the Workbook.
from openpyxl import Workbook
from openpyxl.drawing.image import Image
wb = Workbook()
ws = wb.active
img = Image(r'C:\your path\Image.jpg,png...etc')
img.width= 150
img.height = 200
ws.add_image(img, "C4")
wb.save('imagecheck.xlsx')

Can we copy image from AWS S3 and write it into excel file (S3) without storing the data locally using Python?

I have a jpeg image stored in s3.
My scenario is to create an excel file with that image inserted into it and load the excel file in the s3, without storing the excel file locally, using Python.
You can do it using xlsxwriter package.
import xlsxwriter
import pandas as pd
import boto3
from io import BytesIO
boto_object = boto3.resource('s3',region_name='us-west-2', aws_access_key_id=AWS_SERVER_PUBLIC_KEY, aws_secret_access_key=AWS_SERVER_SECRET_KEY)
bucket_object = boto_object.Bucket(bucket_name=XXXX)
img_path = "images/data/sample_image.jpeg" # can be PNG, JPG or BMP
with BytesIO() as output:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
pandas_dataframe.to_excel(writer, sheet_name='sample_sheetname') #creating a sheet first
image_obj = bucket_object.Object(img_path)
response = image_obj.get()
image_data = BytesIO(response['Body'].read())
writer.sheets['sample_sheetname'].insert_image('A1',img_path, options = {'x_scale': 0.5,
'y_scale': 0.5,
'x_offset': 80,
'y_offset': 10,'image_data':image_data}) #adding data to existing writer object
data = output.getvalue()
bucket_object.put_object(Key='<path>/sample_output.xlsx', Body=data)
output:

Convert PDF page to image with pyPDF2 and BytesIO

I have a function that gets a page from a PDF file via pyPdf2 and should convert the first page to a png (or jpg) with Pillow (PIL Fork)
from PyPDF2 import PdfFileWriter, PdfFileReader
import os
from PIL import Image
import io
# Open PDF Source #
app_path = os.path.dirname(__file__)
src_pdf= PdfFileReader(open(os.path.join(app_path, "../../../uploads/%s" % filename), "rb"))
# Get the first page of the PDF #
dst_pdf = PdfFileWriter()
dst_pdf.addPage(src_pdf.getPage(0))
# Create BytesIO #
pdf_bytes = io.BytesIO()
dst_pdf.write(pdf_bytes)
pdf_bytes.seek(0)
file_name = "../../../uploads/%s_p%s.png" % (name, pagenum)
img = Image.open(pdf_bytes)
img.save(file_name, 'PNG')
pdf_bytes.flush()
That results in an error:
OSError: cannot identify image file <_io.BytesIO object at 0x0000023440F3A8E0>
I found some threads with a similar issue, (PIL open() method not working with BytesIO) but I cannot see where I am wrong here, as I have pdf_bytes.seek(0) already added.
Any hints appreciated
Per document:
write(stream) Writes the collection of pages added to this object out
as a PDF file.
Parameters: stream – An object to write the file to. The object must
support the write method and the tell method, similar to a file
object.
So the object pdf_bytes contains a PDF file, not an image file.
The reason why there are codes like above work is: sometimes, the pdf file just contains a jpeg file as its content. If your pdf is just a normal pdf file, you can't just read the bytes and parse it as an image.
And refer to as a more robust implementation: https://stackoverflow.com/a/34116472/334999
[![enter image description here][1]][1]
import glob, sys, fitz
# To get better resolution
zoom_x = 2.0 # horizontal zoom
zoom_y = 2.0 # vertical zoom
mat = fitz.Matrix(zoom_x, zoom_y) # zoom factor 2 in each dimension
filename = "/xyz/abcd/1234.pdf" # name of pdf file you want to render
doc = fitz.open(filename)
for page in doc:
pix = page.get_pixmap(matrix=mat) # render page to an image
pix.save("/xyz/abcd/1234.png") # store image as a PNG
Credit
[Convert PDF to Image in Python Using PyMuPDF][2]
https://towardsdatascience.com/convert-pdf-to-image-in-python-using-pymupdf-9cc8f602525b

Preserve exif data of image with PIL when resize(create thumbnail)

When I try to resize (thumbnail) an image using PIL, the exif data is lost.
What do I have to do preserve exif data in the thumbnail image? When I searched for the same, got some links but none seem to be working.
from PIL import Image
import StringIO
file_path = '/home/me/img/a.JPG'
im = Image.open( file_path)
THUMB_SIZES = [(512, 512)]
for thumbnail_size in THUMB_SIZES:
im.thumbnail( thumbnail_size, Image.ANTIALIAS)
thumbnail_buf_string = StringIO.StringIO()
im.save('512_' + "a", "JPEG")
The orginal image has exif data, but image im(512_a.JPEG) doesn't.
I read throught some of the source code and found a way to make sure that the exif data is saved with the thumbnail.
When you open a jpg file in PIL, the Image object has an info attribute which is a dictionary. One of the keys is called exif and it has a value which is a byte string - the raw exif data from the image. You can pass this byte string to the save method and it should write the exif data to the new jpg file:
from PIL import Image
size = (512, 512)
im = Image.open('P4072956.jpg')
im.thumbnail(size, Image.ANTIALIAS)
exif = im.info['exif']
im.save('P4072956_thumb.jpg', exif=exif)
To get a human-readable version of the exif data you can do the following:
from PIL import Image
from PIL.ExifTags import TAGS
im = Image.open('P4072956.jpg')
for k, v in im._getexif().items():
print TAGS.get(k, k), v
In my project, i met the same issue with you. After searching Google, I found piexif library. It help to Pilow save exif data to thumbnails.
You can use the source code below:
from PIL import Image
import piexif
import StringIO
file_path = '/home/me/img/a.JPG'
im = Image.open( file_path)
# load exif data
exif_dict = piexif.load(im.info["exif"])
exif_bytes = piexif.dump(exif_dict)
THUMB_SIZES = [(512, 512)]
for thumbnail_size in THUMB_SIZES:
im.thumbnail( thumbnail_size, Image.ANTIALIAS)
thumbnail_buf_string = StringIO.StringIO()
# save thumbnail with exif data
im.save('512_' + "a", "JPEG", exif=exif_bytes)
Note: I am using python 3.4 and ubuntu 14.04
import pyexiv2
from PIL import Image
file_path = '/home/../img/a.JPG'
metadata = pyexiv2.ImageMetadata(file_path)
metadata.read()
thumb = metadata.exif_thumbnail
thumb.set_from_file(file_path)
thumb.write_to_file('512_' + "a")
thumb.erase()
metadata.write()
Now I open the image using (Patch Image Inspector) , I can see the exif data

Python parsing XLS with images [duplicate]

I found some Python2 code to extract images from Excel files.
I have a very fundamental question: Where shall I specify the path of my target excel file?
Or does it only work with an active opened Excel file?
import win32com.client # Need pywin32 from pip
from PIL import ImageGrab # Need PIL as well
import os
excel = win32com.client.Dispatch("Excel.Application")
workbook = excel.ActiveWorkbook
wb_folder = workbook.Path
wb_name = workbook.Name
wb_path = os.path.join(wb_folder, wb_name)
#print "Extracting images from %s" % wb_path
print("Extracting images from", wb_path)
image_no = 0
for sheet in workbook.Worksheets:
for n, shape in enumerate(sheet.Shapes):
if shape.Name.startswith("Picture"):
# Some debug output for console
image_no += 1
print("---- Image No. %07i ----", image_no)
# Sequence number the pictures, if there's more than one
num = "" if n == 0 else "_%03i" % n
filename = sheet.Name + num + ".jpg"
file_path = os.path.join (wb_folder, filename)
#print "Saving as %s" % file_path # Debug output
print('Saving as ', file_path)
shape.Copy() # Copies from Excel to Windows clipboard
# Use PIL (python imaging library) to save from Windows clipboard
# to a file
image = ImageGrab.grabclipboard()
image.save(file_path,'jpeg')
You can grab images from existing Excel file like this:
from PIL import ImageGrab
import win32com.client as win32
excel = win32.gencache.EnsureDispatch('Excel.Application')
workbook = excel.Workbooks.Open(r'C:\Users\file.xlsx')
for sheet in workbook.Worksheets:
for i, shape in enumerate(sheet.Shapes):
if shape.Name.startswith('Picture'): # or try 'Image'
shape.Copy()
image = ImageGrab.grabclipboard()
image.save('{}.jpg'.format(i+1), 'jpeg')
An xlsx file is actually a zip file. You can directly get the images from the xl/media subfolder. You can do this in python using the ZipFile class. You don't need to have MS Excel or even run in Windows!
Filepath and filename is defined in the variables here:
wb_folder = workbook.Path
wb_name = workbook.Name
wb_path = os.path.join(wb_folder, wb_name)
In this particular case, it calls the active workbook at the line prior:
workbook = excel.ActiveWorkbook
But you should theoretically be able to specify path using the wb_folder and wb_name variables, as long as you load the file on the excel module (Python: Open Excel Workbook using Win32 COM Api).

Categories