Image in image.show isn't showing anything (Python 3 notebook) - python

My output doesn't show anything and I honestly can't find out why
This is the full code, but I think the problem is when I'm passing the argument to aRed, aGreen, aBlue, originalImage = openImage(response.content)
When I run that code in collab python notebook, my image isn't showing up for some reason! Maybe it's the way I'm passing the URL as an argument in the line above?
import numpy
from PIL import Image
import requests
from io import BytesIO
# FUNCTION DEFINTIONS:
# open the image and return 3 matrices, each corresponding to one channel (R, G and B channels)
def openImage(imagePath):
imOrig = Image.open(BytesIO(imagePath))
im = numpy.array(imOrig)
aRed = im[:, :, 0]
aGreen = im[:, :, 1]
aBlue = im[:, :, 2]
return [aRed, aGreen, aBlue, imOrig]
# compress the matrix of a single channel
def compressSingleChannel(channelDataMatrix, singularValuesLimit):
uChannel, sChannel, vhChannel = numpy.linalg.svd(channelDataMatrix)
aChannelCompressed = numpy.zeros((channelDataMatrix.shape[0], channelDataMatrix.shape[1]))
k = singularValuesLimit
leftSide = numpy.matmul(uChannel[:, 0:k], numpy.diag(sChannel)[0:k, 0:k])
aChannelCompressedInner = numpy.matmul(leftSide, vhChannel[0:k, :])
aChannelCompressed = aChannelCompressedInner.astype('uint8')
return aChannelCompressed
# MAIN PROGRAM:
response = requests.get('https://i.imgur.com/BIOFZNo.png')
print ('*** Image Compression using SVD - a demo')
aRed, aGreen, aBlue, originalImage = openImage(response.content)
# image width and height:
imageWidth = 1000
imageHeight = 1000
#number of singular values to use for reconstructing the compressed image
singularValuesLimit = 160
aRedCompressed = compressSingleChannel(aRed, singularValuesLimit)
aGreenCompressed = compressSingleChannel(aGreen, singularValuesLimit)
aBlueCompressed = compressSingleChannel(aBlue, singularValuesLimit)
imr=Image.fromarray(aRedCompressed,mode=None)
img=Image.fromarray(aGreenCompressed,mode=None)
imb=Image.fromarray(aBlueCompressed,mode=None)
newImage = Image.merge("RGB", (imr,img,imb))
originalImage.show()
newImage.show()
There are no errors in compiling the program, it just doesn't show up anything.
Thank you all!
Here is the link to my file: https://colab.research.google.com/drive/12K0nWKRdOpZ3gSfTn0wuP8Y0_UUeUxEE

You don't need to specify .show() in interactive modes. Just remove that part, and it will work fine.
import numpy
from PIL import Image
import requests
from io import BytesIO
# FUNCTION DEFINTIONS:
# open the image and return 3 matrices, each corresponding to one channel (R, G and B channels)
def openImage(imagePath):
imOrig = Image.open(BytesIO(imagePath))
im = numpy.array(imOrig)
aRed = im[:, :, 0]
aGreen = im[:, :, 1]
aBlue = im[:, :, 2]
return [aRed, aGreen, aBlue, imOrig]
# compress the matrix of a single channel
def compressSingleChannel(channelDataMatrix, singularValuesLimit):
uChannel, sChannel, vhChannel = numpy.linalg.svd(channelDataMatrix)
aChannelCompressed = numpy.zeros((channelDataMatrix.shape[0], channelDataMatrix.shape[1]))
k = singularValuesLimit
leftSide = numpy.matmul(uChannel[:, 0:k], numpy.diag(sChannel)[0:k, 0:k])
aChannelCompressedInner = numpy.matmul(leftSide, vhChannel[0:k, :])
aChannelCompressed = aChannelCompressedInner.astype('uint8')
return aChannelCompressed
# MAIN PROGRAM:
response = requests.get('https://i.imgur.com/BIOFZNo.png')
print ('*** Image Compression using SVD - a demo')
aRed, aGreen, aBlue, originalImage = openImage(response.content)
# image width and height:
imageWidth = 1000
imageHeight = 1000
#number of singular values to use for reconstructing the compressed image
singularValuesLimit = 160
aRedCompressed = compressSingleChannel(aRed, singularValuesLimit)
aGreenCompressed = compressSingleChannel(aGreen, singularValuesLimit)
aBlueCompressed = compressSingleChannel(aBlue, singularValuesLimit)
imr=Image.fromarray(aRedCompressed,mode=None)
img=Image.fromarray(aGreenCompressed,mode=None)
imb=Image.fromarray(aBlueCompressed,mode=None)
newImage = Image.merge("RGB", (imr,img,imb))
originalImage
OriginalImage will be displayed. For new image, in next code cell:
newImage

Related

Unable to extract a word out of an image

I've written a script in python in combination with pytesseract to extract a word out of an image. There is only a single word TOOLS available in that image and that is what I'm after. Currently my below script is giving me wrong output which is WIS. What Can I do to get the text?
Link to that image
This is my script:
import requests, io, pytesseract
from PIL import Image
response = requests.get('http://facweb.cs.depaul.edu/sgrais/images/Type/Tools.jpg')
img = Image.open(io.BytesIO(response.content))
img = img.resize([100,100], Image.ANTIALIAS)
img = img.convert('L')
img = img.point(lambda x: 0 if x < 170 else 255)
imagetext = pytesseract.image_to_string(img)
print(imagetext)
# img.show()
This is the status of the modified image when I run the above script:
The output I'm having:
WIS
Expected output:
TOOLS
The key is matching image transformation to the tesseract abilities. Your main problem is that the font is not a usual one. All you need is
from PIL import Image, ImageEnhance, ImageFilter
response = requests.get('http://facweb.cs.depaul.edu/sgrais/images/Type/Tools.jpg')
img = Image.open(io.BytesIO(response.content))
# remove texture
enhancer = ImageEnhance.Color(img)
img = enhancer.enhance(0) # decolorize
img = img.point(lambda x: 0 if x < 250 else 255) # set threshold
img = img.resize([300, 100], Image.LANCZOS) # resize to remove noise
img = img.point(lambda x: 0 if x < 250 else 255) # get rid of remains of noise
# adjust font weight
img = img.filter(ImageFilter.MaxFilter(11)) # lighten the font ;)
imagetext = pytesseract.image_to_string(img)
print(imagetext)
And voila,
TOOLS
are recognized.
The key issue with your implementation lies here:
img = img.resize([100,100], Image.ANTIALIAS)
img = img.point(lambda x: 0 if x < 170 else 255)
You could try different sizes and different threshold:
import requests, io, pytesseract
from PIL import Image
from PIL import ImageFilter
response = requests.get('http://facweb.cs.depaul.edu/sgrais/images/Type/Tools.jpg')
img = Image.open(io.BytesIO(response.content))
filters = [
# ('nearest', Image.NEAREST),
('box', Image.BOX),
# ('bilinear', Image.BILINEAR),
# ('hamming', Image.HAMMING),
# ('bicubic', Image.BICUBIC),
('lanczos', Image.LANCZOS),
]
subtle_filters = [
# 'BLUR',
# 'CONTOUR',
'DETAIL',
'EDGE_ENHANCE',
'EDGE_ENHANCE_MORE',
# 'EMBOSS',
'FIND_EDGES',
'SHARPEN',
'SMOOTH',
'SMOOTH_MORE',
]
for name, filt in filters:
for subtle_filter_name in subtle_filters:
for s in range(220, 250, 10):
for threshold in range(250, 253, 1):
img_temp = img.copy()
img_temp.thumbnail([s,s], filt)
img_temp = img_temp.convert('L')
img_temp = img_temp.point(lambda x: 0 if x < threshold else 255)
img_temp = img_temp.filter(getattr(ImageFilter, subtle_filter_name))
imagetext = pytesseract.image_to_string(img_temp)
print(s, threshold, name, subtle_filter_name, imagetext)
with open('thumb%s_%s_%s_%s.jpg' % (s, threshold, name, subtle_filter_name), 'wb') as g:
img_temp.save(g)
and see what works for you.
I would suggest you resize your image while keeping the original ratio. You could also try some alternative to img_temp.convert('L')
Best so far: TWls and T0018
You can try to manipulate the image manually and see if you can find some edit that can provide a better output (for instance http://gimpchat.com/viewtopic.php?f=8&t=1193)
By knowing in advance the font you could probably achieve a better result too.

Why am I getting this error in facenet?

I am trying to run facematch(facenet) on my Virtual Machine (Google Cloud Platform). At first, things were running smoothly and it was embedding the points of the faces, but then out of the blue, my code stopped working.
The first code, you can see the imports are there
For the second code, you can see the imports are there.
This is the ls commands, so you can see that all the directories/modules are there and see the errors I'm getting
Anyone can share some insight on what I'm doing wrong?
Face_match_demo code:
import tensorflow as tf
import numpy as np
import facenet
from align import detect_face
import cv2
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img1", type = str, required=True)
parser.add_argument("--img2", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
def compare2face(img1,img2):
face1 = getFace(img1)
face2 = getFace(img2)
if face1 and face2:
# calculate Euclidean distance
dist = np.sqrt(np.sum(np.square(np.subtract(face1[0]['embedding'], face2[0]['embedding']))))
return dist
return -1
img1 = cv2.imread(args.img1)
img2 = cv2.imread(args.img2)
distance = compare2face(img1, img2)
threshold = 1.10 # set yourself to meet your requirement
print("distance = "+str(distance))
face_embeddings_demo code:
import tensorflow as tf
from align import detect_face
import facenet
import cv2
import imutils
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--img", type = str, required=True)
args = parser.parse_args()
# some constants kept as default from facenet
minsize = 20
threshold = [0.6, 0.7, 0.7]
factor = 0.709
margin = 44
input_image_size = 160
sess = tf.Session()
# read pnet, rnet, onet models from align directory and files are det1.npy, det2.npy, det3.npy
pnet, rnet, onet = detect_face.create_mtcnn(sess, 'align')
# read 20170512-110547 model file downloaded from https://drive.google.com/file/d/0B5MzpY9kBtDVZ2RpVDYwWmxoSUk
facenet.load_model("20170512-110547/20170512-110547.pb")
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
embedding_size = embeddings.get_shape()[1]
def getFace(img):
faces = []
img_size = np.asarray(img.shape)[0:2]
bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if not len(bounding_boxes) == 0:
for face in bounding_boxes:
if face[4] > 0.50:
det = np.squeeze(face[0:4])
bb = np.zeros(4, dtype=np.int32)
bb[0] = np.maximum(det[0] - margin / 2, 0)
bb[1] = np.maximum(det[1] - margin / 2, 0)
bb[2] = np.minimum(det[2] + margin / 2, img_size[1])
bb[3] = np.minimum(det[3] + margin / 2, img_size[0])
cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
resized = cv2.resize(cropped, (input_image_size,input_image_size),interpolation=cv2.INTER_CUBIC)
prewhitened = facenet.prewhiten(resized)
faces.append({'face':resized,'rect':[bb[0],bb[1],bb[2],bb[3]],'embedding':getEmbedding(prewhitened)})
return faces
def getEmbedding(resized):
reshaped = resized.reshape(-1,input_image_size,input_image_size,3)
feed_dict = {images_placeholder: reshaped, phase_train_placeholder: False}
# print(feed_dict)
embedding = sess.run(embeddings, feed_dict=feed_dict)
return embedding
img = cv2.imread(args.img)
img = imutils.resize(img,width=1000)
faces = getFace(img)
for face in faces:
print("Embeddings = "+str(face['embedding']))
cv2.waitKey(0)
cv2.destroyAllWindows()
You have to have the __init__.py in the package directory to be recognized as a package. It can be an empty file, but it has to be present. You don't have this in the align directory.
From the documentation:
The __init__.py files are required to make Python treat the directories as containing packages
From your comment, the error
usage: face_match_demo.py [-h] --img1 IMG1 --img2 IMG2 face_match_demo.py: error: ambiguous option: --img=images/faces.jpg could match --img2, --img1
means that face_match_demo.py is actually a utility to match two images, to say whether they contain the same face or not. So you have to provide two images to it, and it will tell if the face is the same. And you need to use the --img1 and --img2 options to do that like this:
python face_match_demo.py --img1 images/faces.jpg --img2 [[another face image]]

How to write a function in Python that can rotate an image (cImage)

How to write the code? I could only come up with this:
def rotateImage90CW(image):
pic = FileImage(image)
oldw = pic.getWidth()
oldh = pic.getHeight()
newIm = EmptyImage(oldw,oldh)
for row in range (oldh):
for col in range(oldw):
oldPixel = pic.getPixel(col,row)
newIm.setPixel(oldw-row,col,oldPixel)
newIm.draw(myWin)
If you use PIL/pillow:
from PIL import Image
im = Image.open(image)
im.rotate(90).show()
In your example, oldw-row should be „row”

Resize/crop image encoded as base64 image string

in my case, there are 2 ways of getting image to resize/crop.
upload normal image file
giving base64 string data of image
in 1. case, resize and crop is working well:
f = Image.open(uploaded_image)
new_width, new_height = 1200, 630
wpercent = (new_width / float(f.size[0]))
hsize = int((float(f.size[1]) * float(wpercent)))
if f.mode != "RGB":
f = f.convert('RGB')
og_img = None
if f.size[0] < new_width:
#upscale
og_img = f.resize((new_width, hsize), Image.BICUBIC)
elif f.size[0] >= new_width:
#downscale
og_img = f.resize((new_width, hsize), Image.ANTIALIAS)
og_img = og_img.crop((0, 0, 1200, 630))
resized/cropped image:
in 2. case, the code is the same as above with slight change in:
base64_image = str(request.POST.get('base64_image')).split(',')[1]
imgfile = open('/'.join([settings.MEDIA_ROOT, 'test.png' ]), 'w+b')
imgfile.write(decodestring(base64_image))
imgfile.seek(0)
f = Image.open(imgfile)
#.. as above
but the resized/cropped image:
why is it in 2.case bad in quality and size? (black bottom part..) what am I doing wrong? am I reading the base64 string in wrong way?
I found a website which has many interesting things in it.It has 2(there are many) tools which maybe can help you.The 1th tool converts image to base64 and the 2th tool minifies the size of image (up to 70% save).
http://www.w3docs.com/tools/minimage/
http://www.w3docs.com/tools/image-base64

OpenCV + Numpy Script

The issue I'm having is that the two scripts below are both outputting this error: https://i.imgur.com/sLH6Mv4.png
TypeError: FeatureDetector.detect() takes at most 2 arguments (3 given)
which I can avoid in the script 2 below by deleting:
useProvidedKeypoints = False
from the end of
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
which leads to this error in the 2nd script :https://i.imgur.com/ap0odal.png
TypeError: float() argument must be a string or a number
And this error in the first script: i.imgur.com/UVzNvP1.png (2 link limit add manually)
TypeError: trainData data type = 17 is not supported
Any help would be greatly appreciated and the main thing I want to come out of this is with a script I can tweak and edit till I understand the functions involved slightly better.
Summary; I'm not really sure why kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False) is telling me there too many arguments because the person who helped me write this seemed to think this should work.
1
import cv2
import numpy as np
img =cv2.imread('win18.jpg')
imgg =cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
surf = cv2.SURF()
kp, descritors = surf.detect(imgg,None,useProvidedKeypoints = False)
samples = np.array(descritors)
responses = np.arange(len(kp),dtype = np.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
template = cv2.imread('win17.jpg')
templateg= cv2.cvtColor(template,cv2.COLOR_BGR2GRAY)
keys,desc = surf.detect(templateg,None,useProvidedKeypoints = False)
for h,des in enumerate(desc):
des = np.array(des,np.float32).reshape((1,128))
retval, results, neigh_resp, dists = knn.find_nearest(des,1)
res,dist = int(results[0][0]),dists[0][0]
if dist<0.1:
color = (0,0,255)
else:
print dist
color = (255,0,0)
x,y = kp[res].pt
center = (int(x),int(y))
cv2.circle(img,center,2,color,-1)
x,y = keys[h].pt
center = (int(x),int(y))
cv2.circle(template,center,2,color,-1)
cv2.imshow('img',img)
cv2.imshow('tm',template)
cv2.waitKey(0)
cv2.destroyAllWindows()
2
import cv2
import numpy
opencv_haystack =cv2.imread('win12.jpg')
opencv_needle =cv2.imread('win1.jpg')
ngrey = cv2.cvtColor(opencv_needle, cv2.COLOR_BGR2GRAY)
hgrey = cv2.cvtColor(opencv_haystack, cv2.COLOR_BGR2GRAY)
hessian_threshold = 85
detector = cv2.SURF(hessian_threshold)
(hkeypoints, hdescriptors) = detector.detect(hgrey, None, useProvidedKeypoints = False)
(nkeypoints, ndescriptors) = detector.detect(ngrey, None, useProvidedKeypoints = False)
rowsize = len(hdescriptors) / len(hkeypoints)
if rowsize > 1:
hrows = numpy.array(hdescriptors, dtype = numpy.float32).reshape((-1, rowsize))
nrows = numpy.array(ndescriptors, dtype = numpy.float32).reshape((-1, rowsize))
else:
hrows = numpy.array(hdescriptors, dtype = numpy.float32)
nrows = numpy.array(ndescriptors, dtype = numpy.float32)
rowsize = len(hrows[0])
samples = hrows
responses = numpy.arange(len(hkeypoints), dtype = numpy.float32)
knn = cv2.KNearest()
knn.train(samples,responses)
if dist < 0.1:
color = (0, 0, 255)
else:
color = (255, 0, 0)
x,y = hkeypoints[res].pt
center = (int(x),int(y))
cv2.circle(opencv_haystack,center,2,color,-1)
x,y = nkeypoints[i].pt
center = (int(x),int(y))
cv2.circle(opencv_needle,center,2,color,-1)
cv2.imshow('haystack',opencv_haystack)
cv2.imshow('needle',opencv_needle)
cv2.waitKey(0)
cv2.destroyAllWindows()
Hi I know it's late but for the ones still facing the problem, try replacing detect() with detectAndCompute().
I got the error removed this way.
when in doubt, ...
>>> s = cv2.SURF()
>>> help(s.detect)
Help on built-in function detect:
detect(...)
detect(image[, mask]) -> keypoints
so, your assumptions about the args to SURF.detect() were quite off.

Categories