I wanted to take a screenshot of My Valorant Game and give out the remaining Time in the Image.
It all works fine but its not detecting a number in the Image.
time.sleep(2)
myScreenshot = pyautogui.screenshot()
myScreenshot.save(r'Path\screenshot.png')
raw = cv2.imread("Path/screenshot.png")
y=10
x=1160
h=100
w=200
cropped = raw[y:y+h, x:x+w]
cv2.imwrite("Path/Time.png", cropped)
Time = cv2.imread("Path/Time.png")
string = pytesseract.image_to_string(Time, config='--psm 13')
print(string)
Example Image of "Time.png"
I tryed different psm setting they didnt help.
You need to preprocess the input image before OCR (e.g. remove background/noise). Something like this should work for image you provided:
import numpy as np
import pyautogui
import pytesseract
from PIL import Image
y = 10
x = 1160
h = 100
w = 200
cropped = pyautogui.screenshot(region=(x, y, h, w))
data = np.array(cropped)
color = (255, 255, 255)
mask_cv = np.any(data == [255,255,255], axis = -1)
ocr_area = Image.fromarray(np.invert(mask_cv))
string = pytesseract.image_to_string(ocr_area)
print(string)
Related
I'm trying to use Tesseract to identify the characters 0-9, after reciving very poor results I've built a small test file to check what tesseract is actually running against.
I've taken 39 screenshots of images, and combined them with paint into a jpg file, I've also ran the same code/appraoch with a png and bmp version to the same outcome.
The below is the jpg test data and my output from trying to draw the bounding boxes. I expected 39 small green bounding boxes around each of the numbers in question. They appear (mostly) very clear to my eye.
I've tried the base line tesseract data I get with the pip install and got incredibly poor results, I downloaded additional english data from the official github and matching improved dramatically without any further changes, it wasn't great but it was much better.
In one script I take the screenshot images and I drop the blue and green channels, convert to grayscale, apply binary thresholding followed by OTSU to try and smooth out some of the gaps in the screenshots.
def getTemplate():
w, h = 1920, 1080
monitor = {'top': 0, 'left': 0, 'width': w, 'height': h}
img = Image.frombytes('RGB', (w,h), sct.grab(monitor).rgb)
haystack = np.array(img)
return haystack
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.tesseract_cmd = path_to_tesseract
# # get an updated image of the screen & crop it
# template = get_haystack_image()
# haystack = template[137:167, 54:125]
template = getTemplate()
haystack = template[180:215, 55:125]
r = output.copy()
# set blue and green channels to 0
r[:, :, 0] = 0
r[:, :, 1] = 0
gray_image = cv.cvtColor(output, cv.COLOR_BGR2GRAY)
thresh = cv.threshold(gray_image, 40, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]
img = cv.imwrite('testData.jng', thresh)
And then I read the images, apply the below custom config, draw the boxes and print the results.
The results from the below are:
jpg:
222419230
222101064272787315267
from pytesseract import pytesseract
from pytesseract import Output
import cv2 as cv
import os
path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
pytesseract.tesseract_cmd = path_to_tesseract
# Read Images
jpg = cv.imread('testData.jpg')
# Define configuration that only whitelists number characters, set oem and specifiy language
custom_config = r'-l eng --psm 6 --oem 3 -c tessedit_char_whitelist=0123456789'
# Find the numbers in the image
text_jpg = pytesseract.image_to_string(jpg, config=custom_config)
# Print Results
print('jpg:' + '\n' + str(text_jpg))
d = pytesseract.image_to_data(jpg, output_type=Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
cv.rectangle(jpg, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv.imshow('jpg', jpg)
cv.waitKey(0)
cv.imwrite('results.jpg', jpg)
Why are the bounding boxes as overlaped and incorrect as they are and how how can I clean them up to improve my image recognition?
Hi Im trying to create a program that takes a screenshot from another app, takes the numbers from said image and outputs it to txt and another window, my problem comes when the pytesseract.imagetostring brings another value other that the originial.
example:
Here the screenshot that my program took,original image, the number is 8258
then I appplied a grayscale to help tesseract out, image grayscale
after that I aplied blur because aparentely this helps tesseract out, image blur
and finally I put a threshold because It helped to get the numbers correctly, image thresh
after all of that is done I call pytessract to make a string form the image:
data = pytesseract.image_to_string(thresh, lang='eng',config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
But my results are this for the txt, tesseract txt (its shows twice because I put 2 prints)
and for my window image window , the value comes out as 68258
I really don´t know why this is happening and Im all out of answers to fix it
im using tesseract v.5.0.0
this is the code Im using:
from ctypes import resize
import pygetwindow
import pyautogui
import cv2
import pytesseract
from tkinter import *
from tkinter import font
from pygetwindow import PyGetWindowException
import numpy as np
import io
path = "C:\Image.png"
//this function takes the screnshot from the window of the other app
def getting_image():
titles = pygetwindow.getAllTitles()
window = pygetwindow.getWindowsWithTitle('Connect')[0]
x1 = window.left
y1 = window.top
height = window.height
width = window.width
x2 = x1 + width
y2 = y1 + height
pyautogui.screenshot(path)
im = Image.open(path)
im = im.crop((x1+640,y1+125,x2-130,y2-670))
im.save(path, dpi=(600,600))
//this function takes the image saved and convert it to text
def send_image():
image = cv2.imread(path) //path is currently my C: drive, C:\Image.png
cv2.imshow('image_org',image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('image_bckgray',gray)
blur = cv2.medianBlur(gray,5)
cv2.imshow('image_blur',blur)
thresh = 255 - cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
cv2.imshow('image_thresh',thresh)
data = pytesseract.image_to_string(thresh, lang='eng',config='--psm 13 --oem 3 -c tessedit_char_whitelist=0123456789')
print(data)
data = wording(data) //this just takes out the \x0c at the end of the file
print(data)
return (data)
Can I use cv2 or numpy to turn an image into a negative? Something like below but I need to edit still.
My question is mainly the top bit of code if I can use that to invert the grayscale and black&white both to a negative?
import cv2
import numpy as np
img = cv2.imageread('imagename.jpg')
print(img.dtype)
image_neg = 255 - img
cv2.imshow('negative',image_neg)
cv2.waitKey(0)
#######################################
from images import Image
def invert(image):
def blackAndWhite(image):
blackPixel = (0, 0, 0)
whitePixel = (255, 255, 255)
for y in range(image.getHeight()):
for x in range(image.getWidth()):
(r, g, b) = image.getPixel(x, y)
average = (r + g + b) // 3
if average < 128:
image.setPixel(x, y, blackPixel)
else:
image.setPixel(x, y, whitePixel)
def grayscale(image):
for y in range(image.getHeight()):
for x in range(image.getWidth()):
(r, g, b) = image.getPixel(x, y)
r = int(r * 0.299)
g = int(g * 0.587)
b = int(b * 0.114)
lum = r + g + b
image.setPixel(x, y, (lum, lum, lum))
def main():
filename = input("Enter the image file name: ")
image = Image(filename)
#Invert image
invert(image)
image.draw()
#Covert to greyscale, then invert
"""grayscale(image)
invert(image)
image.draw()"""
#Convert to black and white, then invert
"""blackAndWhite(image)
invert(image)
image.draw()"""
if __name__ == "__main__":
main()
I receive the following error:
Traceback (most recent call last):
File "invert.py", line 14, in <module>
image_neg = 255 - image
NameError: name 'image' is not defined
I changed the code in the beginning to say this:
import cv2
import numpy as np
image = cv2.imageread('smokey.gif')
print(image.dtype)
image_neg = 255 - image
cv2.imshow('negative',image_neg)
cv2.waitKey(0)
Well I thought this would work but it tells me line - "invertedImage = cv2.bitwise_not(imageToInvert)" has a SyntaxError: invalid non-printable character U+00A0
I edited my code correctly on here (4 spaces) and I have no clue why it's not showing correctly still.
from images import Image
import cv2
def invert(image):
imageToInvert = cv2.imread(filepath)
invertedImage = cv2.bitwise_not(imageToInvert)
cv2.imgwrite("BWimage.png",invertedImage)
print("inverted image saved")
File_path='smokey.gif'
invert(File_path)
Not sure what error you are getting. Maybe something here will help?
Syntax: cv2.cv.flip(src, flipCode[, dst] )
Parameters:
src: Input array.
dst: Output array of the same size and type as src.
flip code: A flag to specify how to flip the array; 0 means flipping around the x-axis and positive value (for example, 1) means flipping around y-axis. Negative value (for example, -1) means flipping around both axes.
Return Value: It returns an image.
As found in OpenCV
example code:
# Python program to explain cv2.flip() method
# importing cv2
import cv2
# path
path = r'C:\Users\user\Desktop\geeks14.png'
# Reading an image in default mode
src = cv2.imread(path)
# Window name in which image is displayed
window_name = 'Image'
# Using cv2.flip() method
# Use Flip code 0 to flip vertically
image = cv2.flip(src, 0)
# Displaying the image
cv2.imshow(window_name, image)
cv2.waitKey(0)
I am working on a project where I should apply and OCR on some documents.
The first step is to threshold the image and let only the writing (whiten the background).
Example of an input image: (For the GDPR and privacy reasons, this image is from the Internet)
Here is my code:
import cv2
import numpy as np
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
h = image.shape[0]
w = image.shape[1]
for y in range(0, h):
for x in range(0, w):
if image[y, x] >= 120:
image[y, x] = 255
else:
image[y, x] = 0
cv2.imwrite('output.jpg', image)
Here is the result that I got:
When I applied pytesseract to the output image, the results were not satisfying (I know that an OCR is not perfect). Although I tried to adjust the threshold value (in this code it is equal to 120), the result was not as clear as I wanted.
Is there a way to make a better threshold in order to only keep the writing in black and whiten the rest?
After digging deep in StackOverflow questions, I found this answer which is about removing watermark using opencv.
I adapted the code to my needs and this is what I got:
import numpy as np
import cv2
image = cv2.imread('a.png')
img = image.copy()
alpha =2.75
beta = -160.0
denoised = alpha * img + beta
denoised = np.clip(denoised, 0, 255).astype(np.uint8)
#denoised = cv2.fastNlMeansDenoising(denoised, None, 31, 7, 21)
img = cv2.cvtColor(denoised, cv2.COLOR_BGR2GRAY)
h = img.shape[0]
w = img.shape[1]
for y in range(0, h):
for x in range(0, w):
if img[y, x] >= 220:
img[y, x] = 255
else:
img[y, x] = 0
cv2.imwrite('outpu.jpg', img)
Here is the output image:
The good thing about this code is that it gives good results not only with this image, but also with all the images that I tested.
I hope it helps anyone who had the same problem.
You can use adaptive thresholding. From documentation :
In this, the algorithm calculate the threshold for a small regions of the image. So we get different thresholds for different regions of the same image and it gives us better results for images with varying illumination.
import numpy as np
import cv2
image = cv2.imread('b.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.medianBlur(image ,5)
th1 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
cv2.THRESH_BINARY,11,2)
th2 = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY,11,2)
cv2.imwrite('output1.jpg', th1 )
cv2.imwrite('output2.jpg', th2 )
I am trying to extract text from the below image. I tried OCR in python. But it is giving me incorrect results.
I preprocessed the image removed the underline, used canny edge detector increased contrast ratio and then feed it to OCR. Still, I am not getting expected output.
With limited knowledge, I tried to separate characters out of image after increasing contrast.
import cv2
import numpy as np
import os
image_path = os.path.join(os.path.dirname(__file__), "image.png")
im = cv2.imread(image_path)
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
# converted intermediate pixels to black and white
gray[gray<100] = 0
gray[gray>=100] = 255
gray = gray[~np.all(gray == 255, axis=1)]
gray = gray[:,~np.all(gray == 255, axis=0)]
gray = gray[~np.all(gray == 0, axis=1)]
print (np.where(np.all(gray == 255,axis=0)))
print (gray[:,20:33])
words = np.hsplit(gray, np.where(np.all(gray == 255,axis=0))[0])
i = 0
for word in words:
word = word[:,~np.all(word == 255, axis=0)]
if(word.size):
print (word.shape)
i = i + 1
cv2.imwrite("temp" + str(i) + ".png", word)
It became like this
And again I gave this as input to pytesseract. It gave me blank output.
Here are my doubts.
Can we have a better mechanism to separate characters on white-space from image. Currently it seems highly breakable to me.
How can we pre-process image to be better detected by OCR.
Can we use neural-networks or SVM over here like we used for MNIST Digits dataset
Short pointers are ok if it seems too broad. What is the best approach to tackle this kind of problem?
This answer implements what is said in my comment.
I changed your code a little and refrained form using opencv. The code is written using Python 3.5
To extract the digits, I am summing the image columnwise and scale the resulting array to get check. I am here operating on the gray image that you already cut, effectively getting rid of the underline.
x_sum = np.sum(gray, axis = 0)
check = ((x_sum)/np.max(x_sum)*10)
This array can now be used to compare with a threshold to identify the regions where a letter/digit is located such as:
plt.imshow(gray, cmap='gray')
x_sum = np.sum(gray, axis = 0)
check = ((x_sum)/np.max(x_sum)*10)
plt.plot((check<8).astype(int))
plt.show()
Now we will use this information to modify the image and erase the regions where the check array is valued 0 such as:
for idx,i in enumerate((check<8).astype(int)):
if i < 1:
gray[:,idx] = 255
Therefore we have this image:
Which can be further processed just are you are already doing. This provides seperated letters/digits which can then be postprocessed for learning.
The next step that you would work on is scaling/resizing the letters/images to be described by the same amount of features.
Then finally, you can use a pretrained classifier to predict the most probable letter/digits.
The full code is provided here:
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.stats import mstats
import scipy
from matplotlib import gridspec
from PIL import Image
image = Image.open("testl.png")
f = image.convert('I')
gray = np.array(f)
gray[gray<200] = 0
gray[gray>=200] = 255
gray = gray[~np.all(gray == 255, axis=1)]
gray = gray[:,~np.all(gray == 255, axis=0)]
gray = gray[~np.all(gray == 0, axis=1)]
plt.imshow(gray, cmap='gray')
x_sum = np.sum(gray, axis = 0)
check = ((x_sum)/np.max(x_sum)*10)
plt.plot((check<8).astype(int))
plt.show()
plt.matshow(gray)
plt.show()
for idx,i in enumerate((check<8).astype(int)):
if i < 1:
gray[:,idx] = 255
plt.matshow(gray)
plt.show()
words = np.hsplit(gray, np.where(np.all(gray >= 200,axis=0))[0])
gs = gridspec.GridSpec(1,len(words))
fig = plt.figure(figsize=(len(words),1))
i = 0
for word in words:
word = word[:,~np.all(word >= 230, axis=0)]
if(word.size):
ax = fig.add_subplot(gs[i])
print (word.shape)
i = i + 1
ax.matshow(word, aspect = 'auto')
plt.show()
This finally yields all seperated letters/digits such as: