I am trying to detect text from image of a particular font i also have font name and its ttl file and i also try to resize the file but still i am unable to do that here is code :
import cv2
import pytesseract
def read(image):
img=cv2.imread(image)
img = cv2.resize(img, None, fx=2, fy=2)
text=pytesseract.image_to_string(image=img)
text = text.strip()
return text
print(read('data.png'))
Where data.png is:
And font name is Monotype Corsiva Regular
So is there any ways to read the text ?
Related
I am trying to extract "#opentowork" text from LinkedIn account thumbnails but it does not seem to work.
the image is:
I have tried the simplest way:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
print(pytesseract.image_to_string(r'1633367686467.jpg'))
And using opencv
img = cv2.imread('1516535608688.jpg', 0)
# img = 255 - img #invert image
print(pytesseract.image_to_string(img, lang='eng', config = '--psm 12'))
But it returns nothing. I am not really experienced in text recognition. Is it possible to extract the text from such images?
Hello I have this text detector code below where green squares will drew itself around the each of detected text using OpenCV and it works well but I wanted to expand the project by saying out the detected word using pyttsx3 module but a problem occurs when I ran the code is that the window is not displaying but detected text is being says
import cv2
from matplotlib.pyplot import text
import pytesseract
from pytesseract import pytesseract
import pyttsx3
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread('C:\\users\\HP\Documents\\Yt thumbnails\\vector.png')
# Print the text contained in image
img2text = pytesseract.image_to_string(img)
print(img2text)
height,width,c = img.shape
letter_boxes = pytesseract.image_to_boxes(img)
say = pyttsx3.init()
speech = img2text
say.say(speech)
say.runAndWait()
for box in letter_boxes.splitlines():
box = box.split()
x,y,w,h = int(box[1]),int(box[2]),int(box[3]),int(box[4]) # Height of the boxes
cv2.rectangle(img, (x,height-y), (w,height-h),(0,0,255),3) # Add boxes
cv2.putText(img,box[0],(x,height-h+32), cv2.FONT_HERSHEY_COMPLEX,1,(0,255,0),2) # Add texts
cv2.imshow('Window',img) # Display window
cv2.waitKey(0)
Im trying to get pytesseract to work at identifying an image as single characters and not words.
Using code: This works, but only for detecting words not single characters in the image.
#importing modules
import pytesseract
from PIL import Image
# If you don't have tesseract executable in your PATH, include the following:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
#converting image to text
print(pytesseract.image_to_string(Image.open('C:\Program Files\Tesseract-OCR\image2.png')))
Attempting to view single characters Code:
#importing modules
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
#converting image to text
text = pytesseract.image_to_string(img_new, lang='eng', config='--psm 10')
print(pytesseract.image_to_string(Image.open('C:\Program Files\Tesseract-OCR\image2.png')))
I get error
text = pytesseract.image_to_string(img_new, lang='eng', config='--psm 10')
NameError: name 'img_new' is not defined
from PIL import Image, ImageDraw, ImageFont
import glob
import os
images = glob.glob("directory_path/*.jpg")
for img in images:
images = Image.open(img)
draw = ImageDraw.Draw(images)
font = ImageFont.load_default() #Downloaded Font from Google font
text = "Text on all images from directory"
draw.text((0,150),text,(250,250,250),font=font)
images.save(img)
I have to put text on all images , I have tried above code but its not working
This code worked for me just fine, but the text was hard to read because it was small and white. I did change directory_path to images and put my images in there. The images looked like this, the text is small and on the left side:
Here is the solution
from PIL import Image,ImageDraw,ImageFont
import glob
import os
images=glob.glob("path/*.jpg")
for img in images:
images=Image.open(img)
draw=ImageDraw.Draw(images)
font=ImageFont.load_default()
text="Whatever text"
draw.text((0,240),text,(250,250,250),font=font)
images.save(img)
one possible problem with the code may be that you are using the images variable for saving the list of images and also to iterate through the images.
Try this code, this will work for sure.
from PIL import Image, ImageDraw, ImageFont
import glob
import os
images = glob.glob("new_dir/*.jpg")
print(images)
for img in images:
image = Image.open(img)
draw = ImageDraw.Draw(image)
font = ImageFont.load_default() #Downloaded Font from Google font
text = "Text on all images from directory"
draw.text((0,150),text,fill = 'red' ,font=font)
image.save(img)
I have a multiple page .tif file, I am trying to extract text from it using Tesseract OCR but I am getting this error
TypeError: Unsupported image object
Code
from PIL import Image
import pytesseract
img = Image.open('Group 1/1_CHE_MDC_1.tif')
text = pytesseract.image_to_string(img.seek(0)) # OCR on 1st Page
text = ' '.join(text.split())
print(text)
ERROR
Any idea why its happening
Image.seek does not have a return value so you're essentially running:
pytesseract.image_to_string(None)
Instead do:
img.seek(0)
text = pytesseract.image_to_string(img)
I had a same question and i have tried below code and it worked for me :-
import glob
import pytesseract
import os
os.chdir("Set your Tesseract-OCR .exe file path")
b = ''
for i in glob.glob('Fullpath of your image directory/*.tif'): <-- you can give *.jpg extension in case of jpg image
if glob.glob('*.tif'):
b = b + (pytesseract.image_to_string(i))
print(b)
Happy learning !