Regarding Image processing/ or Jpeg format without library - python

I am trying to write a python code to download and get the detail of image resolution and size and rescale it to (320*568) convert it into base64 string and save it in JSON format.
input : any jpeg image
output : JSON
{ "image64":"encoded string",
"size":"size of image",
"resolution":"resolution of image" }
till now i have done this by this way:
from PIL import Image
a=Image.open("DSC_2561.jpg")
print(a.size)
a=a.resize((320,568))
how to proceed from here?

Here is the solution:
from PIL import Image
import os
def encodeBase64(path):
img= Image.open(path)
img= img.resize((320,568))
data = img.encode("base64")
json = { "image64": data, "size":os.stat(path).st_size, "resolution":img.size[0] + "x" + img.size[1] }
return json

Related

Rasterio MemoryFile to Pillow image (Or base64 output)

I'm trying to write an AWS Lambda function that takes a TIFF, converts it to JPEG, then outputs it in base64 so that lambda can serve it. But I keep running into malformed response, or issues with reshape_as_image saying axes doesn't match array.
My understanding was that the return of memfile.read() would allow me to use reshape_as_image, however my logic seems faulty.
Without saving to disk, how can I get from memfile to a base64 jpeg representation so that lambda can serve it? I've also tried pillow but I think the necessary step is where I'm failing.
def get_image(self, driver="jpeg"):
data = self.get_image()
with MemoryFile() as memfile:
# Change the driver for output
data[1]['driver'] = driver
with MemoryFile() as memfile:
# Change the driver for output
data[1]['driver'] = driver
with memfile.open(**data[1]) as dataset:
dataset.write(data[0])
image = memfile.read()
image = reshape_as_image(image)
im = Image.open(io.BytesIO(image))
b64data = base64.b64encode(im.tobytes()).decode('utf-8')
return b64data
It seems this isn't necessary for some reason, assuming because memfile.read() gives the actual bytes of the image.
def get_image(self, store=False, driver="GTiff"):
data = self.crop_ortho(store)
with MemoryFile() as memfile:
# Change the driver for output
data[1]['driver'] = driver
with memfile.open(**data[1]) as dataset:
dataset.write(data[0])
image = memfile.read()
im = Image.open(io.BytesIO(image))
im = im.convert('RGB')
# Save bytes to a byte array
imgByteArr = io.BytesIO()
im.save(imgByteArr, format='jpeg')
b64data = base64.b64encode(imgByteArr.getvalue())
return b64data

How to convert Image PIL into Base64 without saving

I generate an image with Python, and I need to convert this Pil Image into a Base64, without saving this one into any folder...
I have some data, and I get RGB img with the line below:
img = Image.fromarray(data,'RGB')
What is the simple way to convert this PIL into base64 ?(I can't open a file image because I must not save the img) ?
Thank you for your help
With Node JS, I can get the correct base64 with these lines :
pythonShell= require("python-shell");
app.post('/index/gen/',urlencodedParser, function (req,res){
pythonShell.run('minigen.py', function (err, results) {
if (err) throw err;
var img = base64img.base64Sync('./images/miniature.jpg');
res.send(img); });
})
But I have to save the file if I use NodeJS...
this is the code to generate the matrix from the image, you don't need to know what is in data ;)
image = Image.open("./carte/"+fichier)
image = image.resize((400,400),Image.ANTIALIAS)
w,h = image.size
tab = numpy.array(image)
data = numpy.zeros((h, w, 3), dtype=numpy.uint8)
I found the solution. Hope this helps !
img = Image.fromarray(data, 'RGB') #Crée une image à partir de la matrice
buffer = BytesIO()
img.save(buffer,format="JPEG") #Enregistre l'image dans le buffer
myimage = buffer.getvalue()
print "data:image/jpeg;base64,"+base64.b64encode(myimage)
#florian answer helped me a lot but base64.b64encode(img_byte) returned bytes so I needed to decode it to string before concatenation (using python 3.6):
def img_to_base64_str(self, img):
buffered = BytesIO()
img.save(buffered, format="PNG")
buffered.seek(0)
img_byte = buffered.getvalue()
img_str = "data:image/png;base64," + base64.b64encode(img_byte).decode()
You can use base64 library like this:
import base64
base64.b64encode(img.tobytes())
See tobytes() method of Image object.
Or you can use something like this:
import glob
import random
import base64
from PIL import Image
from io import BytesIO
import io
def get_thumbnail(path):
path = "\\\\?\\"+path # This "\\\\?\\" is used to prevent problems with long Windows paths
i = Image.open(path)
return i
def image_base64(im):
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
Just pass path of image in get_thumbnail function and image_formatter to display it in HTML.

Change color scheme when extracting an image from PDF in Python

I am trying to read an image from a pdf following this post:
Extract images from PDF without resampling, in python?
So far I managed to get the image file from the pdf, but it contains a CWYK color scheme and the picture is becoming messed up.
My code is the following:
import PyPDF2
import struct
pdf_filename = 'document.pdf'
pdf_file = open(pdf_filename, 'rb')
cond_scan_reader = PyPDF2.PdfFileReader(pdf_file)
page = cond_scan_reader.getPage(4)
xObject = page['/Resources']['/XObject'].getObject()
for obj in xObject:
print(xObject[obj])
if xObject[obj]['/Subtype'] == '/Image':
if xObject[obj]['/Filter'] == '/DCTDecode':
data = xObject[obj]._data
img = open("image" + ".jpg", "wb")
img.write(data)
img.close()
pdf_file.close()
The point is that when I save, the colors are all weird, I believe it's because of the colorScheme. I have the following in the console:
{'/Type': '/XObject', '/Subtype': '/Image', '/Width': 1122, '/Height': 502, '/Interpolate': <PyPDF2.generic.BooleanObject object at 0x1061574a8>, '/ColorSpace': '/DeviceCMYK', '/BitsPerComponent': 8, '/Filter': '/DCTDecode'}
As you can see, the ColorSpace is CMYK, and I believe that's why the colors of the image are weird.
That's the image I have:
This is the original image (it is inside a pdf file):
Can anyone help me?
Thanks in advance.
Israel
A CMYK mode JPG image that contained in PDF must be invert.
But in PIL, invert of CMYK mode image is not supported.
Than I solve it using numpy.
Full source is in below link.
https://github.com/Gaia3D/pdfImageExtractor/blob/master/extrectImage.py
imgData = np.frombuffer(img.tobytes(), dtype='B')
invData = np.full(imgData.shape, 255, dtype='B')
invData -= imgData
img = Image.frombytes(img.mode, img.size, invData.tobytes())
img.save(outFileName + ".jpg")

How to change image format without writing it to disk using Python Pillow

I got Pillow image that i got from the Internet:
response= urllib2.urlopen(<url to gif image>)
img = Image.open(cStringIO.StringIO(response.read()))
I want to use it with tesserocr but it wont work with GIF images.
If I save the image as PNG img.save("tmp.png") and load it img = Image.open("tmp.png") everything works.
Is there a way to do this conversion without writing to disk?
import io
from PIL import Image
def convertImageFormat(imgObj, outputFormat=None):
"""Convert image format
Args:
imgObj (Image): the Pillow Image instance
outputFormat (str): Image format, eg: "JPEG"/"PNG"/"BMP"/"TIFF"/...
more refer: https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html
Returns:
bytes, binary data of Image
Raises:
"""
newImgObj = imgObj
if outputFormat and (imgObj.format != outputFormat):
imageBytesIO = io.BytesIO()
imgObj.save(imageBytesIO, outputFormat)
newImgObj = Image.open(imageBytesIO)
return newImgObj
call example:
pngImgFile = "xxx.png"
pngImgObj = Image.open(pngImgFile)
convertToFormat = "JPEG"
convertedJpgImgBytes = convertImageFormat(pngImgObj, convertToFormat)
advanced version convertImageFormat can refer my lib crifanPillow.py
import io
from PIL import Image
def convertImageFormat(imgObj, outputFormat=None, isOptimize=False, isKeepPrevValues=True):
"""Convert image format
Args:
imgObj (Image): the Pillow Image instance
outputFormat (str): Image format, eg: "JPEG"/"PNG"/"BMP"/"TIFF"/...
more refer: https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html
isOptimize (bool): do optimize when using save to convert format
isKeepPrevValues (bool): keep previous property values, such as: filename
Returns:
bytes, binary data of Image
Raises:
"""
newImgObj = imgObj
if outputFormat and (imgObj.format != outputFormat):
imageBytesIO = io.BytesIO()
if isOptimize:
imgObj.save(imageBytesIO, outputFormat, optimize=True)
else:
imgObj.save(imageBytesIO, outputFormat)
newImgObj = Image.open(imageBytesIO)
if isKeepPrevValues:
if imgObj.filename:
newImgObj.filename = imgObj.filename
return newImgObj
The solution was very simple:
response= urllib2.urlopen(<url to gif image>)
img = Image.open(cStringIO.StringIO(response.read()))
img = img.convert("RGB")
Note that you need to remove the alpha channel info to make image compatible with tesserocr

Python Wand convert PDF to PNG disable transparent (alpha_channel)

I'm trying to convert a PDF to PNG - this all works fine, however, the output image is still transparent even when I believe I have disabled it:
with Image(filename='sample.pdf', resolution=300) as img:
img.background_color = Color("white")
img.alpha_channel = False
img.save(filename='image.png')
The above produces the images but are transparent, I also tried the below:
with Image(filename='sample.pdf', resolution=300, background=Color('white')) as img:
img.alpha_channel = False
img.save(filename='image.png')
which produces this error:
Traceback (most recent call last):
File "file_convert.py", line 20, in <module>
with Image(filename='sample.pdf', resolution=300, background=Color('white')) as img:
File "/Users/Frank/.virtualenvs/wand/lib/python2.7/site-packages/wand/image.py", line 1943, in __init__
raise TypeError("blank image parameters can't be used with image "
TypeError: blank image parameters can't be used with image opening parameters
I also had some PDFs to convert to PNG. This worked for me and seems simpler than compositing images, as shown above.:
from wand.image import Image
from wand.color import Color
all_pages = Image(blob=self.pdf) # PDF will have several pages.
single_image = all_pages.sequence[0] # Just work on first page
with Image(single_image) as i:
i.format = 'png'
i.background_color = Color('white') # Set white background.
i.alpha_channel = 'remove' # Remove transparency and replace with bg.
Reference: wand.image
From a previous answer, try creating an empty image with a background color, then composite over.
from wand.image import Image
from wand.color import Color
with Image(filename="sample.pdf", resolution=300) as img:
with Image(width=img.width, height=img.height, background=Color("white")) as bg:
bg.composite(img,0,0)
bg.save(filename="image.png")
Compiling the other answers, here is the function I use to convert a PDF into pages:
import os
from wand.image import Image
from wand.color import Color
def convert_pdf(filename, output_path, resolution=150):
""" Convert a PDF into images.
All the pages will give a single png file with format:
{pdf_filename}-{page_number}.png
The function removes the alpha channel from the image and
replace it with a white background.
"""
all_pages = Image(filename=filename, resolution=resolution)
for i, page in enumerate(all_pages.sequence):
with Image(page) as img:
img.format = 'png'
img.background_color = Color('white')
img.alpha_channel = 'remove'
image_filename = os.path.splitext(os.path.basename(filename))[0]
image_filename = '{}-{}.png'.format(image_filename, i)
image_filename = os.path.join(output_path, image_filename)
img.save(filename=image_filename)
The other answer (compositing with a white image) works, but only on the last page, as does setting the alpha channel directly. The following works on wand 0.4.2:
im = wand_image(filename='/tmp/foo.pdf', resolution=200)
for i, page in enumerate(im.sequence):
with wand_image(page) as page_image:
page_image.alpha_channel = False
page_image.save(filename='/tmp/foo.pdf.images/page-%s.png' % i)
I think this is probably a bug in wand. It seems like setting the alpha channel for a PDF should affect all pages, but it doesn't.
For those who are still having problem with this, I found solution (it works in version 0.4.1 and above, I am not sure about earlier versions).
So you should just use something like this:
from wand.image import Image
from wand.color import Color
with Image(filename='sample.pdf', resolution=300) as img:
img.background_color = Color("white")
img.alpha_channel = 'remove'
img.save(filename='image.png')

Categories