I am building a character identifier from an image using Tesseract and Python.
This is my code:
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
img = cv2.imread("bigsleep.jpg")
text = pytesseract.image_to_string(img)
cv2.imshow("Img", img)
cv2.waitKey(0)
print(text)
I am getting the following error while executing this program
TypeError: Unsupported image object
Can anyone solve this issue
Related
i tried to open read the image using the pytesseract , however the code is not able to read it please check this photo im using for reading the text.
below is my code:-
import cv2
import time
import pyscreenshot as ImageGrab
import pytesseract
pytesseract.pytesseract.tesseract_cmd=r'C:/Users/RTam/AppData/Local/Programs/Tesseract-OCR/tesseract.exe'
def takescreenshot():
path= (r'C:\Users\RTam\Desktop\python basics\web scraping\Pyautogui\photos')
im=ImageGrab.grab(bbox=(900,1000,1200,1100))
im.save(path+'\\'+'ss.png')
img= cv2.imread(r'C:\Users\RTam\Desktop\python basics\web scraping\Pyautogui\photos\ss3.png')
cv2.imshow('sample',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
sample_text= pytesseract.image_to_string(img)
print(sample_text)
the only output im getting is and empty space please help
Eventually, I found the answer to my question.
However this code will not run properly in Spyder IDE, so we should make sure we have the latest tesseract version.
import cv2
import time
import pyscreenshot as ImageGrab
import pytesseract
pytesseract.pytesseract.tesseract_cmd=r'C:/Users/RTam/AppData/Local/Programs/Tesseract-OCR/tesseract.exe'
def takescreenshot():
path= (r'C:\Users\RTam\Desktop\python basics\web scraping\Pyautogui\photos')
im=ImageGrab.grab(bbox=(900,1000,1200,1100))
im.save(path+'\\'+'ss.png')
img= cv2.imread(r'C:\Users\RTam\Desktop\python basics\web scraping\Pyautogui\photos\ss3.png')
def clerify_pic():
img2 = cv2.resize(img, (0, 0), fx=2, fy=2)
gry = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
thr = cv2.threshold(gry, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return pytesseract.image_to_string(thr)
I am trying to solve a captcha :
and run a script :
from PIL import Image
from pytesseract import pytesseract
path_to_tesseract = r"/usr/local/Cellar/tesseract/5.0.1/bin/tesseract"
image_path2 = r"captcha2.jpg"
img = Image.open(image_path2)
pytesseract.tesseract_cmd = path_to_tesseract
text = pytesseract.image_to_string(img)
print(text[:-1])
captchaText=text[:-1]
but output is blank and when I use the same script with the following captcha:
it works great.
Im trying to get pytesseract to work at identifying an image as single characters and not words.
Using code: This works, but only for detecting words not single characters in the image.
#importing modules
import pytesseract
from PIL import Image
# If you don't have tesseract executable in your PATH, include the following:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
#converting image to text
print(pytesseract.image_to_string(Image.open('C:\Program Files\Tesseract-OCR\image2.png')))
Attempting to view single characters Code:
#importing modules
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
#converting image to text
text = pytesseract.image_to_string(img_new, lang='eng', config='--psm 10')
print(pytesseract.image_to_string(Image.open('C:\Program Files\Tesseract-OCR\image2.png')))
I get error
text = pytesseract.image_to_string(img_new, lang='eng', config='--psm 10')
NameError: name 'img_new' is not defined
When I try to use pytesseract.image_to_osd() with any image and with any psm mode (I use 12 but I have tried this with the others), I get this error:
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\IS\\AppData\\Local\\Temp\\tess_6f2kkuwq.osd'
Every time I run the code the missing "file" is different, but always is in the same format:
tess_xxxxxxx.osd with the middle part being seemingly random.
What is going on here?
Full code:
import pytesseract
from pytesseract import Output
import cv2
img = "cn.png"
conf = r'--psm 12'
img = cv2.imread(img)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.bilateralFilter(img,9,75,75)
result = pytesseract.image_to_osd(img, config=conf, output_type=Output.DICT)
print(result)
I have a multiple page .tif file, I am trying to extract text from it using Tesseract OCR but I am getting this error
TypeError: Unsupported image object
Code
from PIL import Image
import pytesseract
img = Image.open('Group 1/1_CHE_MDC_1.tif')
text = pytesseract.image_to_string(img.seek(0)) # OCR on 1st Page
text = ' '.join(text.split())
print(text)
ERROR
Any idea why its happening
Image.seek does not have a return value so you're essentially running:
pytesseract.image_to_string(None)
Instead do:
img.seek(0)
text = pytesseract.image_to_string(img)
I had a same question and i have tried below code and it worked for me :-
import glob
import pytesseract
import os
os.chdir("Set your Tesseract-OCR .exe file path")
b = ''
for i in glob.glob('Fullpath of your image directory/*.tif'): <-- you can give *.jpg extension in case of jpg image
if glob.glob('*.tif'):
b = b + (pytesseract.image_to_string(i))
print(b)
Happy learning !