read images from zipfile in python throwing errors - python

i am trying to get metadata from images inside a zip file. and it's throwing this error:
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x00000157389C2130>
here is my code:
import zipfile
from PIL import Image
from PIL.ExifTags import TAGS
from io import BytesIO
with zipfile.ZipFile("samples.zip", "r") as f:
for name in f.namelist():
image_data = f.read(name)
image = Image.open(BytesIO(image_data))
exif_data = image.getexif()
width, height = image.size
print(width, height)
i tried every solution i could find and still get the error. please help

Related

Python Attribute Error Raised while using Thumbnail method of PIL

I am using PIL to make an application to open all images in a folder. I sought for tutorials for PIL. I tried to find tutorials with list of images, but I failed to do so. I found some, but I had to list the file location beforehand. It annoyed me. So, instead I want the user to choose a folder, and the application would load all the images for the user. But, while making the thumbnails for the list of images, I got an error which I'm not familiar with. This is the exact error:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\Admin\AppData\Local\Programs\Python\Python39\lib\tkinter\__init__.py", line
1892, in __call__
return self.func(*args)
File "f:\OG\Python\ImageViewer.py", line 47, in openFolder
GetFiles()
File "f:\OG\Python\ImageViewer.py", line 87, in GetFiles
with Image.open(i) as img:
prefix = fp.read(16)
raise AttributeError(name)
The minimal code to get this error is:
import glob
from PIL import Image, ImageTk
fileDir = "Your Folder"
imageList = []
image_list = []
for filename in glob.glob(fileDir + '/*.jpg'): # gets jpg
im = Image.open(filename)
imageList.append(im)
for i in imageList:
with Image.open(i) as img: # This raises the error
imageList[i] = img.thumbnail((550, 450))
for i in image_list: # Would this work?
image_list[i] = ImageTk.PhotoImage(imageList[i])
I would like to know if the code that is commented with 'Would this work?' would work or not.
Just remove the reading part again which doesn't make sense
import glob
from PIL import Image, ImageTk
fileDir =r"your path"
imageList = []
for filename in glob.glob(fileDir + '/*.jpg'): # gets jpg
im = Image.open(filename)
imageList.append(im)
imageList will look like this :
[<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=200x200 at 0x25334A87D90>]
here is the blockbuster solution
import glob
from PIL import Image, ImageTk
import PIL
from pathlib import Path
fileDir = r"your_path_here"
imageList = []
for filename in glob.glob(fileDir + '/*.jpg'): # gets jpg
im = Image.open(filename)
imageList.append(im)
im.thumbnail((550, 450))
im.save(fileDir+'/'+Path(filename).name.split('.')[0]+'_thumbnail.png')
I solved it, I edited the code as follows:
import glob
from PIL import Image, ImageTk
fileDir = "Your Folder"
imageList = []
image_list = []
count = 0
for filename in glob.glob(fileDir + '/*.jpg'): # gets jpg
imageList.append(filename)
for i in imageList:
with Image.open(i) as img:
i = img.thumbnail((550, 450))
for i in imageList: # This gives a Key Error Now
image_list.append(ImageTk.PhotoImage(imageList[count]))
count = count + 1
Basically, Introduced a new variable count with a value of 0, removed open from first for loop, used append method for the last for loop and added count 1 each time :)

image to text conversion using Tesseract

I am trying to load all images in a folder and extract text from images. I keep getting error message for the second for loop. For example,
AttributeError: 'numpy.ndarray' object has no attribute 'read'
It seems I cannot access list Img. Any idea?
# import OpenCV, Numpy, Python image library, Tesseract OCR
import os
import cv2
import numpy
from PIL import Image
import pytesseract
import glob
#set tesseract path
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract.exe'
#read all image with .jpg format in a specifying folder
img = []
for i in glob.glob("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\\Reports\\Image\\*.jpg"):
n= cv2.imread(i,0) #convert image to grayscale
print(i)
img.append(n)
for j in img:
im = Image.open(j)
text = pytesseract.image_to_string (j, lang='eng')
with open("C:\\Users\\daizhang\\Desktop\\Deloitte Development\\Python\Reports\\Image\\test.txt", "w") as f:
f.write(text.encode('utf8'))
I have Mac OSX but you can adjust this code to file Window's path directory.
import os
from os import path
from glob import glob
from pytesseract import image_to_string
from PIL import Image, ImageEnhance, ImageFilter
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
def convert_img(filename):
image = Image.open(filename)
# Convert image to text
file = open ('parsing.txt', 'a')
file.write(image_to_string(image))
file.close
def find_ext(dir, ext):
return glob(path.join(dir, "*.{}".format(ext)))
# use the following for change directory
# os.chdir(path)
filename = find_ext("","png")
for file in filename:
# convert image to text
convert_img(file)
If you want to enhance the image then include the following block and adjust the code above to loop through the new filenames.
def enhance_img(filename):
# Enhance image and save as under new name
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
im.save('newfilename')
For file in filename:
# to enhance image if needed
newfilename = filename[-3] + '_1.png'
enhance_img(file)

Python - how to read an image from a URL?

I am completely new to Python and I'm trying to figure out how to read an image from a URL.
Here is my current code:
from PIL import Image
import urllib.request, io
URL = 'http://www.w3schools.com/css/trolltunga.jpg'
with urllib.request.urlopen(URL) as url:
s = url.read()
Image.open(s)
I get the following error:
C:\python>python image.py
Traceback (most recent call last):
File "image.py", line 8, in <module>
Image.open(s)
File "C:\Anaconda3\lib\site-packages\PIL\Image.py", line 2272, in open
fp = builtins.open(filename, "rb")
ValueError: embedded null byte
I have no idea what any of this means. What am I doing wrong?
Image.open() expects filename or file-like object - not file data.
You can write image locally - i.e. as "temp.jpg" - and then open it
from PIL import Image
import urllib.request
URL = 'http://www.w3schools.com/css/trolltunga.jpg'
with urllib.request.urlopen(URL) as url:
with open('temp.jpg', 'wb') as f:
f.write(url.read())
img = Image.open('temp.jpg')
img.show()
Or you can create file-like object in memory using io module
from PIL import Image
import urllib.request
import io
URL = 'http://www.w3schools.com/css/trolltunga.jpg'
with urllib.request.urlopen(URL) as url:
f = io.BytesIO(url.read())
img = Image.open(f)
img.show()
EDIT: 2022
Because urlopen() also gives file-like object so you can even skip io and use directly url (without .read()) in Image.open()
from PIL import Image
import urllib.request
URL = 'http://www.w3schools.com/css/trolltunga.jpg'
with urllib.request.urlopen(URL) as url:
img = Image.open(url)
img.show()
Here's how to read an image from a URL using scikit-image
from skimage import io
io.imshow(io.imread("http://www.w3schools.com/css/trolltunga.jpg"))
io.show()
Note: io.imread() returns a numpy array
To begin with, you may download the image to your current working directory first
from urllib.request import urlretrieve
url = 'http://www.w3schools.com/css/trolltunga.jpg'
urlretrieve(url, 'pic.jpg')
And then open/read it locally:
from PIL import Image
img = Image.open('pic.jpg')
# For example, check image size and format
print(img.size)
print(img.format)
img.show()
As suggested in this stack overflow answer, you can do something like this:
import urllib, cStringIO
from PIL import Image
file = cStringIO.StringIO(urllib.urlopen(URL).read())
img = Image.open(file)
Then you can use your image freely.
For example, you can convert it to a numpy array:
img_npy = np.array(img)

Upload Image To Imgur After Resizeing In PIL

I am writing a script which will get an image from a link. Then the image will be resized using the PIL module and the uploaded to Imgur using pyimgur. I dont want to save the image on disk, instead manipulate the image in memory and then upload it from memory to Imgur.
The Script:
from pyimgur import Imgur
import cStringIO
import requests
from PIL import Image
LINK = "http://pngimg.com/upload/cat_PNG106.png"
CLIENT_ID = '29619ae5d125ae6'
im = Imgur(CLIENT_ID)
def _upload_image(img, title):
uploaded_image = im.upload_image(img, title=title)
return uploaded_image.link
def _resize_image(width, height, link):
#Retrieve our source image from a URL
fp = requests.get(link)
#Load the URL data into an image
img = cStringIO.StringIO(fp.content)
im = Image.open(img)
#Resize the image
im2 = im.resize((width, height), Image.NEAREST)
#saving the image into a cStringIO object to avoid writing to disk
out_im2 = cStringIO.StringIO()
im2.save(out_im2, 'png')
return out_im2.getvalue()
When I run this script I get this error: TypeError: file() argument 1 must be encoded string without NULL bytes, not str
Anyone has a solution in mind?
It looks like the same problem as this, and the solution is to use StringIO.
A common tip for searching such issues is to search using the generic part of the error message/string.

Preserve exif data of image with PIL when resize(create thumbnail)

When I try to resize (thumbnail) an image using PIL, the exif data is lost.
What do I have to do preserve exif data in the thumbnail image? When I searched for the same, got some links but none seem to be working.
from PIL import Image
import StringIO
file_path = '/home/me/img/a.JPG'
im = Image.open( file_path)
THUMB_SIZES = [(512, 512)]
for thumbnail_size in THUMB_SIZES:
im.thumbnail( thumbnail_size, Image.ANTIALIAS)
thumbnail_buf_string = StringIO.StringIO()
im.save('512_' + "a", "JPEG")
The orginal image has exif data, but image im(512_a.JPEG) doesn't.
I read throught some of the source code and found a way to make sure that the exif data is saved with the thumbnail.
When you open a jpg file in PIL, the Image object has an info attribute which is a dictionary. One of the keys is called exif and it has a value which is a byte string - the raw exif data from the image. You can pass this byte string to the save method and it should write the exif data to the new jpg file:
from PIL import Image
size = (512, 512)
im = Image.open('P4072956.jpg')
im.thumbnail(size, Image.ANTIALIAS)
exif = im.info['exif']
im.save('P4072956_thumb.jpg', exif=exif)
To get a human-readable version of the exif data you can do the following:
from PIL import Image
from PIL.ExifTags import TAGS
im = Image.open('P4072956.jpg')
for k, v in im._getexif().items():
print TAGS.get(k, k), v
In my project, i met the same issue with you. After searching Google, I found piexif library. It help to Pilow save exif data to thumbnails.
You can use the source code below:
from PIL import Image
import piexif
import StringIO
file_path = '/home/me/img/a.JPG'
im = Image.open( file_path)
# load exif data
exif_dict = piexif.load(im.info["exif"])
exif_bytes = piexif.dump(exif_dict)
THUMB_SIZES = [(512, 512)]
for thumbnail_size in THUMB_SIZES:
im.thumbnail( thumbnail_size, Image.ANTIALIAS)
thumbnail_buf_string = StringIO.StringIO()
# save thumbnail with exif data
im.save('512_' + "a", "JPEG", exif=exif_bytes)
Note: I am using python 3.4 and ubuntu 14.04
import pyexiv2
from PIL import Image
file_path = '/home/../img/a.JPG'
metadata = pyexiv2.ImageMetadata(file_path)
metadata.read()
thumb = metadata.exif_thumbnail
thumb.set_from_file(file_path)
thumb.write_to_file('512_' + "a")
thumb.erase()
metadata.write()
Now I open the image using (Patch Image Inspector) , I can see the exif data

Categories