getting the roi section dynamically ignoring resize - python

i have a set of resolution for opencv and pytesseract to detect which is the standard 1920x1080
but after getting the image and resize it to 1920x1080, i will get the ROI around center square of the image
img = cv2.imread("test.png")
height, width, channels = img.shape
print(f'original size / w = {width}, h = {height}')
img = image_resize(img, width=1920, height=1080)
height, width, channels = img.shape
print(f'after resize / w = {width}, h = {height}')
x, y, w, h = 466, 203, 978, 760
roi = img[y:y+h, x:x+w]
something like this to crop out the image, but i found out if my image is not native 1920x1080, which either is from bigger or smaller resolution and resize to 1920x1080, this fixed roi x,y,w,h is not working well. i would like to know a better way to dynamically scale the ROI values from different resolution.
using this resize method i found in stackoverflow as well.
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized

Related

convert image to mnist format

I want to convert images to mnist format,with black digits and white background. the output should look like this
but I am getting output like this
from PIL import Image, ImageFilter
import matplotlib.pyplot as plt
def imageprepare(argv):
im = Image.open(argv).convert('L')
width = float(im.size[0])
height = float(im.size[1])
newImage = Image.new('L', (28, 28), (255)) # creates white canvas of 28x28 pixels
if width > height: # check which dimension is bigger
# Width is bigger. Width becomes 20 pixels.
nheight = int(round((20.0 / width * height), 0)) # resize height according to ratio width
if (nheight == 0): # rare case but minimum is 1 pixel
nheight = 1
# resize and sharpen
img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wtop = int(round(((28 - nheight) / 2), 0)) # calculate horizontal position
newImage.paste(img, (4, wtop)) # paste resized image on white canvas
else:
# Height is bigger. Heigth becomes 20 pixels.
nwidth = int(round((20.0 / height * width), 0)) # resize width according to ratio height
if (nwidth == 0): # rare case but minimum is 1 pixel
nwidth = 1
# resize and sharpen
img = im.resize((nwidth, 20), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wleft = int(round(((28 - nwidth) / 2), 0)) # caculate vertical pozition
newImage.paste(img, (wleft, 4)) # paste resized image on white canvas
newImage.save("D:/Desktop/desktop/MNIST/sample1.png")
tv = list(newImage.getdata()) # get pixel values
# normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
tva = [(255 - x) * 1.0 / 255.0 for x in tv]
print(tva)
return tva
x=imageprepare('D:/Desktop/desktop/MNIST/0/2.bmp')#file path here
print(len(x))# mnist IMAGES are 28x28=784 pixels
Can you please help me where should I modify the code? Thank you :)
[Desired output]:
[Output I am getting]:

Resize image in OpenCv python, filling space with color

i'm trying to resize an image to a default value, filling the entire space.
I've tried to create a blank background, pasting the image i have but i'm having errors:
# image_toresize it's the image I want to apply over the background
# the image im using for the background
blank_image = np.zeros((600,900,3), np.uint8)
blank_image = (255,255,255)
l_img = blank_image.copy()
x_offset = y_offset = 0
height, width = image_toresize.shape[:2]
l_img[0:height, 0:width] = image_toresize.copy()
this error
ValueError: could not broadcast input array from shape (90,657) into shape (90,657,3)
What can i do?
Try below code:
image_toresize = cv2.imread('flower5.jpg')
height, width = image_toresize.shape[:2]
blank_image = np.zeros((600,900,3), np.uint8)
blank_image[:,:] = (255,255,255)
l_img = blank_image.copy() # (600, 900, 3)
x_offset = y_offset = 100
# Here, y_offset+height <= blank_image.shape[0] and x_offset+width <= blank_image.shape[1]
l_img[y_offset:y_offset+height, x_offset:x_offset+width] = image_toresize.copy()
cv2.imshow('img', l_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Figure 1: Original Image
Figure 2: Above image added to a white empty background

How to detect if text is rotated 180 degrees or flipped upside down

I am working on a text recognition project. There is a chance the text is rotated 180 degrees. I have tried tesseract-ocr on terminal, but no luck. Is there any way to detect it and correct it? An example of the text is shown below.
tesseract input.png output
tesseract input.png - --psm 0 -c min_characters_to_try=10
Warning. Invalid resolution 0 dpi. Using 70 instead.
Page number: 0
Orientation in degrees: 180
Rotate: 180
Orientation confidence: 0.74
Script: Latin
Script confidence: 1.67
One simple approach to detect if text is rotated 180 degrees is to use the observation that text tends to be skewed towards the bottom. Here's the strategy:
Convert image to grayscale
Gaussian blur
Threshold image
Find the top/bottom half ROIs of thresholded image
Count non-zero array elements for each half
Threshold image
Find ROIs of top and bottom half
Next we split the top/bottom sections
With each half we count non-zero array elements using cv2.countNonZero(). We get this
('top', 4035)
('bottom', 3389)
By comparing the values between the two halves, if the top half has more pixels than the bottom half, it is upside down by 180 degrees. If it has less, it is correctly oriented.
Now that we have detected if it is upside down, we can rotate it using this function
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
Rotating the image
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
which gives us the correct result
This is the pixel result if the image was correctly oriented
('top', 3209)
('bottom', 4206)
Full code
import numpy as np
import cv2
def rotate(image, angle):
# Obtain the dimensions of the image
(height, width) = image.shape[:2]
(cX, cY) = (width / 2, height / 2)
# Grab the rotation components of the matrix
matrix = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(matrix[0, 0])
sin = np.abs(matrix[0, 1])
# Find the new bounding dimensions of the image
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust the rotation matrix to take into account translation
matrix[0, 2] += (new_width / 2) - cX
matrix[1, 2] += (new_height / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, matrix, (new_width, new_height))
image = cv2.imread("1.PNG")
original_image = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blurred, 110, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("thresh", thresh)
x, y, w, h = 0, 0, image.shape[1], image.shape[0]
top_half = ((x,y), (x+w, y+h/2))
bottom_half = ((x,y+h/2), (x+w, y+h))
top_x1,top_y1 = top_half[0]
top_x2,top_y2 = top_half[1]
bottom_x1,bottom_y1 = bottom_half[0]
bottom_x2,bottom_y2 = bottom_half[1]
# Split into top/bottom ROIs
top_image = thresh[top_y1:top_y2, top_x1:top_x2]
bottom_image = thresh[bottom_y1:bottom_y2, bottom_x1:bottom_x2]
cv2.imshow("top_image", top_image)
cv2.imshow("bottom_image", bottom_image)
# Count non-zero array elements
top_pixels = cv2.countNonZero(top_image)
bottom_pixels = cv2.countNonZero(bottom_image)
print('top', top_pixels)
print('bottom', bottom_pixels)
# Rotate if upside down
if top_pixels > bottom_pixels:
rotated = rotate(original_image, 180)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I kind of liked the pytessaract solution.
import cv2
import pytesseract
from scipy.ndimage import rotate as Rotate
def float_convertor(x):
if x.isdigit():
out= float(x)
else:
out= x
return out
def tesseract_find_rotatation(img: str):
img = cv2.imread(img) if isinstance(img, str) else img
k = pytesseract.image_to_osd(img)
out = {i.split(":")[0]: float_convertor(i.split(":")[-1].strip()) for i in k.rstrip().split("\n")}
img_rotated = Rotate(img, 360-out["Rotate"])
return img_rotated, out
usage
img_loc = ""
img_rotated, out = tessaract_find_rotation(img_loc)

Using circle detection and color detection to recognize an object

I am using color detection (purple in particular) and circle detection to detect pollen object (the purple circular one) in the image below.
Then I write the letter "P" in the object detected. Unfortunately it didn't work as I expected.
I can fix it if I change the radius, but it is not a good idea since I still have lots of similar images with various radius to process. i think the main point is how to know the exact range of the purple in this image. Generally, I want to know how to get the range of an arbitrary color in an image. Some people gave me a sample code but it didn't work well.
Here is my program.
import cv2
import numpy as np
# In[2]:
path = "./sample.JPG"
font = cv2.FONT_HERSHEY_COMPLEX
# In[3]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
# In[4]:
iml = cv2.imread(path,cv2.IMREAD_COLOR)
img = image_resize(iml,width=960)
# In[5]:
hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
hsv = cv2.medianBlur(hsv,5)
#mask = cv2.inRange(hsv, (120, 180, 50), (160, 255, 255))
mask = cv2.inRange(hsv, (105, 100,50), (160, 255, 255))
#mask = cv2.inRange(hsv, (126, 142, 57), (145, 255, 255))
#cv2.GaussianBlur(cimg, (9,9),3)
#cimg = cv2.medianBlur(cimg,5)
output = cv2.bitwise_and(hsv, hsv, mask = mask)
#circles = cv2.HoughCircles(mask[:,:,0],cv2.HOUGH_GRADIENT,1,mask.shape[0]/16,param1=15,param2=20,minRadius=18,maxRadius=38)
circles = cv2.HoughCircles(output[:,:,0],cv2.HOUGH_GRADIENT,1,output.shape[0]/16,param1=15,param2=20,minRadius=15,maxRadius=30)
print(len(circles))
circles = np.uint16(np.around(circles))[0,:]
# In[6]:
for i in circles:
cv2.putText(img,'P',(i[0],i[1]), font, 0.5,(0,255,0),1,cv2.LINE_AA)
# In[7]:
cv2.imwrite("./result.jpg",img)
Note that this answer is not meant to be a solution but maybe a new point of view to achieve your task. Even though it may work in some cases it will probably not be robust enough for automating any processes. That being said, the problem with converting to HSV colorspace is that if the image (as in your case) has similar color objects drawn on it then it will be difficult to distiguish one object from another with cv2.inRange(). I tried to alter your code a bit and made an example on how I would approach this.
First you could try to look for all contours after OTSU theresholding on the image and filter the biggest (donut) and other small ones out with a criteria of your choosing.
Once you have that you can make a ROI around that contour. Then I would try to perform the cv2.inRange() on each ROI.
After that I would search for contours again on each ROI and count white pixels or make a "circularity" criteria for contours. If they pass that means that it has a lot of pixels in range and draw the letter T. Hope it helps a bit. Cheers!
Example:
import cv2
import numpy as np
# In[2]:
path = "./purplecirc4.JPG"
font = cv2.FONT_HERSHEY_COMPLEX
# In[3]:
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
# In[4]:
iml = cv2.imread(path,cv2.IMREAD_COLOR)
img = image_resize(iml,width=960)
# Threshold with OTSU to get all contours
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
_,thresh = cv2.threshold(blur,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
# Empty list for contours that could be positive
ROIs=[]
# Append possible contours to list
# (I have selected height to eliminate unwanted noise)
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if 200 > h > 20:
x1 = x-20
x2 = x+w+20
y1 = y-20
y2 = y+h+20
roi = img[y1:y2, x1:x2]
ROIs.append(roi)
# Iterate through list of ROIS and transform to HSV
# (I made a little adjustment in values )
for i in ROIs:
hsv = cv2.cvtColor(i,cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (115,100,50), (160,255,255))
# Search for contours on every ROI in list and select the biggest one
_, contours, hierarchy = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
# Draw them whole on hsv then transform to gray and perform OTSU threshold and search for contoures
cv2.drawContours(hsv, [cnt], 0, 255, -1)
gray = cv2.cvtColor(hsv, cv2.COLOR_BGR2GRAY)
_,thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
# Make a "roundness" criterion and draw the letter
x,y,w,h = cv2.boundingRect(cnt)
perimeter = cv2.arcLength(cnt,True)
radius = perimeter/(2*np.pi)
area = cv2.contourArea(cnt)
circ = 4*area/(np.pi*(radius*2)**2)
if circ > 0.70:
cv2.putText(i,'P',(int(x+(w/2.5)),int(y+(h/2))), font, 0.5,(0,255,0),1,cv2.LINE_AA)
# Display result:
resized = cv2.resize(img, (0,0), fx=0.5, fy=0.5)
cv2.imshow("roi",resized)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

Detect circular objects with specific color

My goal is detecting all the purple pollen in the image below and put the letter "P" in it.
But the result shows that it always mistakes a black area.
Changing the radius in circle detection would not help because I still have lots of similar images to go. So what should I do to better it?
Here is my code:
# coding: utf-8
import cv2
import numpy as np
path = "./sample.JPG"
font = cv2.FONT_HERSHEY_COMPLEX
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
# initialize the dimensions of the image to be resized and
# grab the image size
dim = None
(h, w) = image.shape[:2]
# if both the width and height are None, then return the
# original image
if width is None and height is None:
return image
# check to see if the width is None
if width is None:
# calculate the ratio of the height and construct the
# dimensions
r = height / float(h)
dim = (int(w * r), height)
# otherwise, the height is None
else:
# calculate the ratio of the width and construct the
# dimensions
r = width / float(w)
dim = (width, int(h * r))
# resize the image
resized = cv2.resize(image, dim, interpolation = inter)
# return the resized image
return resized
iml = cv2.imread(path,cv2.IMREAD_COLOR)
img = image_resize(iml,width=960)
cimg = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
#cv2.GaussianBlur(cimg, (9,9),3)
cimg = cv2.medianBlur(cimg,5)
circles = cv2.HoughCircles(cimg[:,:,0],cv2.HOUGH_GRADIENT,1,cimg.shape[0]/16,param1=15,param2=20,minRadius=18,maxRadius=38)
circles = np.uint16(np.around(circles))[0,:]
for i in circles:
cv2.putText(img,'P',(i[0],i[1]), font, 0.5,(0,255,0),1,cv2.LINE_AA)
cv2.imwrite("./output.jpg",img)
In addition, I also tried using color detection since all I want to detect have the same color (purple). I follow the instructions here
but it still didn't work.
I think you can detect the purple directly in HSV color space if you can carefully choose the right hsv range. This colormap is taken from my other answers.
I select Hue(120,160), Saturation(180, 255), Value(50, 255) for this task to get the mask.
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, (120, 180, 50), (160, 255, 255))
Then you can do the processing on the mask.
Links maybe helpful:
How to define a threshold value to detect only green colour objects in an image :Opencv
Choosing the correct upper and lower HSV boundaries for color detection with`cv::inRange` (OpenCV)
RGB range for color red

Categories