I want to convert images to mnist format,with black digits and white background. the output should look like this
but I am getting output like this
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
def imageprepare(argv):
im = Image.open(argv).convert('L')
width = float(im.size[0])
height = float(im.size[1])
newImage = Image.new('L', (28, 28), (255)) # creates white canvas of 28x28 pixels
if width > height: # check which dimension is bigger
# Width is bigger. Width becomes 20 pixels.
nheight = int(round((20.0 / width * height), 0)) # resize height according to ratio width
if (nheight == 0): # rare case but minimum is 1 pixel
nheight = 1
# resize and sharpen
img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wtop = int(round(((28 - nheight) / 2), 0)) # calculate horizontal position
newImage.paste(img, (4, wtop)) # paste resized image on white canvas
else:
# Height is bigger. Heigth becomes 20 pixels.
nwidth = int(round((20.0 / height * width), 0)) # resize width according to ratio height
if (nwidth == 0): # rare case but minimum is 1 pixel
nwidth = 1
# resize and sharpen
img = im.resize((nwidth, 20), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wleft = int(round(((28 - nwidth) / 2), 0)) # caculate vertical pozition
newImage.paste(img, (wleft, 4)) # paste resized image on white canvas
newImage.save("D:/Desktop/desktop/MNIST/sample1.png")
tv = list(newImage.getdata()) # get pixel values
# normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
tva = [(255 - x) * 1.0 / 255.0 for x in tv]
print(tva)
return tva
x=imageprepare('D:/Desktop/desktop/MNIST/0/2.bmp')#file path here
print(len(x))# mnist IMAGES are 28x28=784 pixels
Can you please help me where should I modify the code? Thank you :)
[Desired output]:
[Output I am getting]:
Related
i have a set of resolution for opencv and pytesseract to detect which is the standard 1920x1080
but after getting the image and resize it to 1920x1080, i will get the ROI around center square of the image
img = cv2.imread("test.png")
height, width, channels = img.shape
print(f'original size / w = {width}, h = {height}')
img = image_resize(img, width=1920, height=1080)
height, width, channels = img.shape
print(f'after resize / w = {width}, h = {height}')
x, y, w, h = 466, 203, 978, 760
roi = img[y:y+h, x:x+w]
something like this to crop out the image, but i found out if my image is not native 1920x1080, which either is from bigger or smaller resolution and resize to 1920x1080, this fixed roi x,y,w,h is not working well. i would like to know a better way to dynamically scale the ROI values from different resolution.
using this resize method i found in stackoverflow as well.
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
i'm trying to resize an image to a default value, filling the entire space.
I've tried to create a blank background, pasting the image i have but i'm having errors:
# image_toresize it's the image I want to apply over the background
# the image im using for the background
blank_image = np.zeros((600,900,3), np.uint8)
blank_image = (255,255,255)
l_img = blank_image.copy()
x_offset = y_offset = 0
height, width = image_toresize.shape[:2]
l_img[0:height, 0:width] = image_toresize.copy()
this error
ValueError: could not broadcast input array from shape (90,657) into shape (90,657,3)
What can i do?
Try below code:
image_toresize = cv2.imread('flower5.jpg')
height, width = image_toresize.shape[:2]
blank_image = np.zeros((600,900,3), np.uint8)
blank_image[:,:] = (255,255,255)
l_img = blank_image.copy() # (600, 900, 3)
x_offset = y_offset = 100
# Here, y_offset+height <= blank_image.shape[0] and x_offset+width <= blank_image.shape[1]
l_img[y_offset:y_offset+height, x_offset:x_offset+width] = image_toresize.copy()
cv2.imshow('img', l_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Figure 1: Original Image
Figure 2: Above image added to a white empty background
I am working on a text recognition project. There is a chance the text is rotated 180 degrees. I have tried tesseract-ocr on terminal, but no luck. Is there any way to detect it and correct it? An example of the text is shown below.
tesseract input.png output
tesseract input.png - --psm 0 -c min_characters_to_try=10
Warning. Invalid resolution 0 dpi. Using 70 instead.
Page number: 0
Orientation in degrees: 180
Rotate: 180
Orientation confidence: 0.74
Script: Latin
Script confidence: 1.67
One simple approach to detect if text is rotated 180 degrees is to use the observation that text tends to be skewed towards the bottom. Here's the strategy:
Convert image to grayscale
Gaussian blur
Threshold image
Find the top/bottom half ROIs of thresholded image
Count non-zero array elements for each half
Threshold image
Find ROIs of top and bottom half
Next we split the top/bottom sections
With each half we count non-zero array elements using cv2.countNonZero(). We get this
('top', 4035)
('bottom', 3389)
By comparing the values between the two halves, if the top half has more pixels than the bottom half, it is upside down by 180 degrees. If it has less, it is correctly oriented.
Now that we have detected if it is upside down, we can rotate it using this function
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
Rotating the image
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
which gives us the correct result
This is the pixel result if the image was correctly oriented
('top', 3209)
('bottom', 4206)
Full code
import numpy as np
import cv2
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
image = cv2.imread("1.PNG")
original_image = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blurred, 110, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("thresh", thresh)
x, y, w, h = 0, 0, image.shape[1], image.shape[0]
top_half = ((x,y), (x+w, y+h/2))
bottom_half = ((x,y+h/2), (x+w, y+h))
top_x1,top_y1 = top_half[0]
top_x2,top_y2 = top_half[1]
bottom_x1,bottom_y1 = bottom_half[0]
bottom_x2,bottom_y2 = bottom_half[1]
# Split into top/bottom ROIs
top_image = thresh[top_y1:top_y2, top_x1:top_x2]
bottom_image = thresh[bottom_y1:bottom_y2, bottom_x1:bottom_x2]
cv2.imshow("top_image", top_image)
cv2.imshow("bottom_image", bottom_image)
# Count non-zero array elements
top_pixels = cv2.countNonZero(top_image)
bottom_pixels = cv2.countNonZero(bottom_image)
print('top', top_pixels)
print('bottom', bottom_pixels)
# Rotate if upside down
if top_pixels > bottom_pixels:
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I kind of liked the pytessaract solution.
import cv2
import pytesseract
from scipy.ndimage import rotate as Rotate
def float_convertor(x):
if x.isdigit():
out= float(x)
else:
out= x
return out
def tesseract_find_rotatation(img: str):
img = cv2.imread(img) if isinstance(img, str) else img
k = pytesseract.image_to_osd(img)
out = {i.split(":")[0]: float_convertor(i.split(":")[-1].strip()) for i in k.rstrip().split("\n")}
img_rotated = Rotate(img, 360-out["Rotate"])
return img_rotated, out
usage
img_loc = ""
img_rotated, out = tessaract_find_rotation(img_loc)
My goal is detecting all the purple pollen in the image below and put the letter "P" in it.
But the result shows that it always mistakes a black area.
Changing the radius in circle detection would not help because I still have lots of similar images to go. So what should I do to better it?
Here is my code:
# coding: utf-8
import cv2
import numpy as np
path = "./sample.JPG"
font = cv2.FONT_HERSHEY_COMPLEX
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
iml = cv2.imread(path,cv2.IMREAD_COLOR)
img = image_resize(iml,width=960)
cimg = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
#cv2.GaussianBlur(cimg, (9,9),3)
cimg = cv2.medianBlur(cimg,5)
circles = cv2.HoughCircles(cimg[:,:,0],cv2.HOUGH_GRADIENT,1,cimg.shape[0]/16,param1=15,param2=20,minRadius=18,maxRadius=38)
circles = np.uint16(np.around(circles))[0,:]
for i in circles:
cv2.putText(img,'P',(i[0],i[1]), font, 0.5,(0,255,0),1,cv2.LINE_AA)
cv2.imwrite("./output.jpg",img)
In addition, I also tried using color detection since all I want to detect have the same color (purple). I follow the instructions here
but it still didn't work.
I think you can detect the purple directly in HSV color space if you can carefully choose the right hsv range. This colormap is taken from my other answers.
I select Hue(120,160), Saturation(180, 255), Value(50, 255) for this task to get the mask.
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (120, 180, 50), (160, 255, 255))
Then you can do the processing on the mask.
Links maybe helpful:
How to define a threshold value to detect only green colour objects in an image :Opencv
Choosing the correct upper and lower HSV boundaries for color detection with`cv::inRange` (OpenCV)
RGB range for color red
How do I generate circular image thumbnails using PIL?
The space outside the circle should be transparent.
Snippets would be highly appreciated, thank you in advance.
The easiest way to do it is by using masks. Create a black and white mask with any shape you want. And use putalpha to put that shape as an alpha layer:
from PIL import Image, ImageOps
mask = Image.open('mask.png').convert('L')
im = Image.open('image.png')
output = ImageOps.fit(im, mask.size, centering=(0.5, 0.5))
output.putalpha(mask)
output.save('output.png')
Here is the mask I used:
If you want the thumbnail size to be variable you can use ImageDraw and draw the mask:
from PIL import Image, ImageOps, ImageDraw
size = (128, 128)
mask = Image.new('L', size, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0) + size, fill=255)
im = Image.open('image.jpg')
output = ImageOps.fit(im, mask.size, centering=(0.5, 0.5))
output.putalpha(mask)
output.save('output.png')
If you want the output in GIF then you need to use the paste function instead of putalpha:
from PIL import Image, ImageOps, ImageDraw
size = (128, 128)
mask = Image.new('L', size, 255)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0) + size, fill=0)
im = Image.open('image.jpg')
output = ImageOps.fit(im, mask.size, centering=(0.5, 0.5))
output.paste(0, mask=mask)
output.convert('P', palette=Image.ADAPTIVE)
output.save('output.gif', transparency=0)
Note that I did the following changes:
The mask is now inverted. The white
was replaced with black and vice versa.
I'm converting into 'P' with an 'adaptive' palette. Otherwise, PIL will only use web-safe colors and the result will look bad.
I'm adding transparency info to the image.
Please note: There is a big issue with this approach. If the GIF image contained black parts, all of them will become transparent as well. You can work around this by choosing another color for the transparency.
I would strongly advise you to use PNG format for this. But if you can't then that is the best you could do.
I would like to add to the already accepted answer a solution to antialias the resulting circle, the trick is to produce a bigger mask and then scale it down using an ANTIALIAS filter:
here is the code
from PIL import Image, ImageOps, ImageDraw
im = Image.open('image.jpg')
bigsize = (im.size[0] * 3, im.size[1] * 3)
mask = Image.new('L', bigsize, 0)
draw = ImageDraw.Draw(mask)
draw.ellipse((0, 0) + bigsize, fill=255)
mask = mask.resize(im.size, Image.ANTIALIAS)
im.putalpha(mask)
this produces a far better result in my opinion.
Slight modification on #DRC's solution to also support images which already have transparency. He sets the alpha channel to 0 (invisible) outside the circle and to 255 inside (opaque), so I use darker which takes the min of the mask and the original alpha channel (which can be anywhere betwen 0-255) :-)
from PIL import Image, ImageChops, ImageDraw
def crop_to_circle(im):
bigsize = (im.size[0] * 3, im.size[1] * 3)
mask = Image.new('L', bigsize, 0)
ImageDraw.Draw(mask).ellipse((0, 0) + bigsize, fill=255)
mask = mask.resize(im.size, Image.ANTIALIAS)
mask = ImageChops.darker(mask, im.split()[-1])
im.putalpha(mask)
im = Image.open('0.png').convert('RGBA')
crop_to_circle(im)
im.save('cropped.png')
Thank you very much. I was looking for hours and your idea does the trick.
Together with this other script from there.
PIL round edges and add border
it works perfectly for me.
from PIL import Image
from PIL import ImageDraw, ImageChops
def add_corners( im, rad=100):
circle = Image.new('L', (rad * 2, rad * 2), 0)
draw = ImageDraw.Draw(circle)
draw.ellipse((0, 0, rad * 2, rad * 2), fill=255)
alpha = Image.new('L', im.size, "white")
w, h = im.size
alpha.paste(circle.crop((0, 0, rad, rad)), (0, 0))
alpha.paste(circle.crop((0, rad, rad, rad * 2)), (0, h - rad))
alpha.paste(circle.crop((rad, 0, rad * 2, rad)), (w - rad, 0))
alpha.paste(circle.crop((rad, rad, rad * 2, rad * 2)), (w - rad, h - rad))
alpha = ImageChops.darker(alpha, im.split()[-1])
im.putalpha(alpha)
return im
im = Image.open ('AceOfSpades.png').convert('RGBA')
im = add_corners (im, 24)
im.show()
im.save("perfect.png")
Name this image AceOfSpades.png for testing