Digit recognition with Tesseract OCR and python - python

I use Tesseract and python to read digits (from a energy meter).
Everything works well except for the number "1".
Tesseract can not read the "1" Digit.
This is the picture I send to tesseract :
And tesseract reads "0000027 ".
How can I tell Tesseract that the vertical rod is a "1" ?
This is my tesseract initialisation :
import tesseract
TESSERACT_LIBRARY_PATH = "C:\\Program Files (x86)\\Tesseract-OCR"
LANGUAGE = "eng"
CHARACTERS = "0123456789"
FALSE = "0"
TRUE = "1"
def init_ocr():
"""
.. py:function:: init_ocr()
Utilize the Tesseract-OCR library to create an tesseract_ocr that
predicts the numbers to be read off of the meter.
:return: tesseract_ocr Tesseracts OCR API.
:rtype: Class
"""
# Initialize the tesseract_ocr with the english language package.
tesseract_ocr = tesseract.TessBaseAPI()
tesseract_ocr.Init(TESSERACT_LIBRARY_PATH, LANGUAGE,
tesseract.OEM_DEFAULT)
# Limit the characters being seached for to numerics.
tesseract_ocr.SetVariable("tessedit_char_whitelist", CHARACTERS)
# Set the tesseract_ocr to predict for only one character.
tesseract_ocr.SetPageSegMode(tesseract.PSM_AUTO)
# Tesseract's Directed Acyclic Graph.
# Not necessary for number recognition.
tesseract_ocr.SetVariable("load_system_dawg", FALSE)
tesseract_ocr.SetVariable("load_freq_dawg", FALSE)
tesseract_ocr.SetVariable("load_number_dawg", TRUE)
tesseract_ocr.SetVariable("classify_enable_learning", FALSE)
tesseract_ocr.SetVariable("classify_enable_adaptive_matcher", FALSE)
return tesseract_ocr

Slightly irrelevant answer, though may serve your original goal.
I had similar problem with tesseract and I had very strict performance requirements as well. I found this simple solution on SO and crafted simple recogniser with OpenCV.
It boils down to finding bounding rectangles (from edges) on the very clear image that you have and then trying to match found objects versus templates. I believe the solution in your case will be both simple and precise though will require slightly more code than you have now.
I will follow this question, since it will be nice to have working solution with tesseract.
I have a limited time, but it seems to be a working solution:
import os
import cv2
import numpy
KNN_SQUARE_SIDE = 50 # Square 50 x 50 px.
def resize(cv_image, factor):
new_size = tuple(map(lambda x: x * factor, cv_image.shape[::-1]))
return cv2.resize(cv_image, new_size)
def crop(cv_image, box):
x0, y0, x1, y1 = box
return cv_image[y0:y1, x0:x1]
def draw_box(cv_image, box):
x0, y0, x1, y1 = box
cv2.rectangle(cv_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
def draw_boxes_and_show(cv_image, boxes, title='N'):
temp_image = cv2.cvtColor(cv_image, cv2.COLOR_GRAY2RGB)
for box in boxes:
draw_box(temp_image, box)
cv2.imshow(title, temp_image)
cv2.waitKey(0)
class BaseKnnMatcher(object):
distance_threshold = 0
def __init__(self, source_dir):
self.model, self.label_map = self.get_model_and_label_map(source_dir)
#staticmethod
def get_model_and_label_map(source_dir):
responses = []
label_map = []
samples = numpy.empty((0, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE), numpy.float32)
for label_idx, filename in enumerate(os.listdir(source_dir)):
label = filename[:filename.index('.png')]
label_map.append(label)
responses.append(label_idx)
image = cv2.imread(os.path.join(source_dir, filename), 0)
suit_image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
sample = suit_image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE))
samples = numpy.append(samples, sample, 0)
responses = numpy.array(responses, numpy.float32)
responses = responses.reshape((responses.size, 1))
model = cv2.KNearest()
model.train(samples, responses)
return model, label_map
def predict(self, image):
image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
image_standard_size = numpy.float32(image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE)))
closest_class, results, neigh_resp, distance = self.model.find_nearest(image_standard_size, k=1)
if distance[0][0] > self.distance_threshold:
return None
return self.label_map[int(closest_class)]
class DigitKnnMatcher(BaseKnnMatcher):
distance_threshold = 10 ** 10
class MeterValueReader(object):
def __init__(self):
self.digit_knn_matcher = DigitKnnMatcher(source_dir='templates')
#classmethod
def get_symbol_boxes(cls, cv_image):
ret, thresh = cv2.threshold(cv_image.copy(), 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
symbol_boxes = []
for contour in contours:
x, y, width, height = cv2.boundingRect(contour)
# You can test here for box size, though not required in your example:
# if cls.is_size_of_digit(width, height):
# symbol_boxes.append((x, y, x+width, y+height))
symbol_boxes.append((x, y, x+width, y+height))
return symbol_boxes
def get_value(self, meter_cv2_image):
symbol_boxes = self.get_symbol_boxes(meter_cv2_image)
symbol_boxes.sort() # x is first in tuple
symbols = []
for box in symbol_boxes:
symbol = self.digit_knn_matcher.predict(crop(meter_cv2_image, box))
symbols.append(symbol)
return symbols
if __name__ == '__main__':
# If you want to see how boxes detection works, uncomment these:
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# draw_boxes_and_show(img_bw, boxes)
# Uncomment to generate templates from image
# import random
# TEMPLATE_DIR = 'templates'
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# for box in boxes:
# # You need to label templates manually after extraction
# cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))
img_bw = cv2.imread(os.path.join('original.png'), 0)
vr = MeterValueReader()
print vr.get_value(img_bw)

Related

Using OpenCV homography with python error in method findHomography

I have a goal to do homography on a live video by capturing my screen and processing it.
In order to do so, I took the code from this link, and manipulated it inside a while loop as follows:
from __future__ import print_function
import cv2 as cv
import numpy as np
from windowcapture import WindowCapture
# initialize the WindowCapture class
capture = WindowCapture('My Window')
bar_img = cv.imread('hammer.jpg',cv.IMREAD_GRAYSCALE)
while(True):
# get an updated image of the game
screenshot = capture.get_screenshot()
screenshot = cv.cvtColor(screenshot,cv.IMREAD_GRAYSCALE)
if bar_img is None or screenshot is None:
print('Could not open or find the images!')
exit(0)
#-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors
minHessian = 400
detector = cv.SIFT_create()
keypoints_obj, descriptors_obj = detector.detectAndCompute(bar_img, None)
keypoints_scene, descriptors_scene = detector.detectAndCompute(screenshot, None)
#-- Step 2: Matching descriptor vectors with a FLANN based matcher
# Since SURF is a floating-point descriptor NORM_L2 is used
matcher = cv.DescriptorMatcher_create(cv.DescriptorMatcher_FLANNBASED)
knn_matches = matcher.knnMatch(descriptors_obj, descriptors_scene, 2)
#-- Filter matches using the Lowe's ratio test
ratio_thresh = 0.75
good_matches = []
for m,n in knn_matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append(m)
#-- Draw matches
img_matches = np.empty((max(bar_img.shape[0], screenshot.shape[0]), bar_img.shape[1]+screenshot.shape[1], 3), dtype=np.uint8)
cv.drawMatches(bar_img, keypoints_obj, screenshot, keypoints_scene, good_matches, img_matches, flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
#-- Localize the object
obj = np.empty((len(good_matches),2), dtype=np.float32)
scene = np.empty((len(good_matches),2), dtype=np.float32)
for i in range(len(good_matches)):
#-- Get the keypoints from the good matches
obj[i,0] = keypoints_obj[good_matches[i].queryIdx].pt[0]
obj[i,1] = keypoints_obj[good_matches[i].queryIdx].pt[1]
scene[i,0] = keypoints_scene[good_matches[i].trainIdx].pt[0]
scene[i,1] = keypoints_scene[good_matches[i].trainIdx].pt[1]
H, _ = cv.findHomography(obj, scene, cv.RANSAC)
#-- Get the corners from the image_1 ( the object to be "detected" )
obj_corners = np.empty((4,1,2), dtype=np.float32)
obj_corners[0,0,0] = 0
obj_corners[0,0,1] = 0
obj_corners[1,0,0] = bar_img.shape[1]
obj_corners[1,0,1] = 0
obj_corners[2,0,0] = bar_img.shape[1]
obj_corners[2,0,1] = bar_img.shape[0]
obj_corners[3,0,0] = 0
obj_corners[3,0,1] = bar_img.shape[0]
scene_corners = cv.perspectiveTransform(obj_corners, H)
#-- Draw lines between the corners (the mapped object in the scene - image_2 )
cv.line(img_matches, (int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])),\
(int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[1,0,0] + bar_img.shape[1]), int(scene_corners[1,0,1])),\
(int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[2,0,0] + bar_img.shape[1]), int(scene_corners[2,0,1])),\
(int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])), (0,255,0), 4)
cv.line(img_matches, (int(scene_corners[3,0,0] + bar_img.shape[1]), int(scene_corners[3,0,1])),\
(int(scene_corners[0,0,0] + bar_img.shape[1]), int(scene_corners[0,0,1])), (0,255,0), 4)
#-- Show detected matches
cv.imshow('Good Matches & Object detection', img_matches)
cv.waitKey()
if cv.waitKey(1) == ord('q'):
cv.destroyAllWindows()
break
print('Done.')
The class WindowCapture that I used uses win32gui to capture the window (maybe it makes a difference if I used it like this and not imread?)
I get the following error when I run the code:
C:\Users\Tester\AppData\Local\Temp\pip-req-build-1i5nllza\opencv\modules\calib3d\src\fundam.cpp:385: error: (-28:Unknown error code -28) The input arrays should have at least 4 corresponding point sets to calculate Homography in function 'cv::findHomography'
Any idea why it happens?

Set up correctly variables in Python

So i'm trying to launch a Python script (originally available from here : https://github.com/dvdtho/python-photo-mosaic). Full code at the bottom of this post.
This basically creates a mosaic (from a source image), with the final image (mosaic) is composed of several other images (tiles).
My question is how I am supposed to fill the variables (the ones at line 212) in order to run the script (through Eclispe in my case).
Should I put directly something like this? (in my case the folder Desktop/tiles contains all the jpg files) :
tile_paths = glob.glob("C:/Users/Sylvia/Desktop/tiles/*.jpg") # I've added this line myself
def create_mosaic(source_path="C:\\Users\\Sylvia\\Desktop\\source\\1.jpg", target="C:\\Users\\Sylvia\\Desktop\\source\\result.jpg", tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
Last time i tried i got this error :
def create_mosaic(source, target, tile_ratio=1920/800, tile_width=75,
enlargement=8, reuse=True, color_mode='RGB', tile_paths,
shuffle_first=30):
^ SyntaxError: non-default argument follows default argument
I'm very lost, hopefully someone can help me.
Here's the code :
import time
import itertools
import random
import sys
import numpy as np
from PIL import Image
from skimage import img_as_float
from skimage.measure import compare_mse
def shuffle_first_items(lst, i):
if not i:
return lst
first_few = lst[:i]
remaining = lst[i:]
random.shuffle(first_few)
return first_few + remaining
def bound(low, high, value):
return max(low, min(high, value))
class ProgressCounter:
def __init__(self, total):
self.total = total
self.counter = 0
def update(self):
self.counter += 1
sys.stdout.write("Progress: %s%% %s" % (100 * self.counter / self.total, "\r"))
sys.stdout.flush()
def img_mse(im1, im2):
"""Calculates the root mean square error (RSME) between two images"""
try:
return compare_mse(img_as_float(im1), img_as_float(im2))
except ValueError:
print(f'RMS issue, Img1: {im1.size[0]} {im1.size[1]}, Img2: {im2.size[0]} {im2.size[1]}')
raise KeyboardInterrupt
def resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint=None):
width = img.size[0]
height = img.size[1]
if not centerpoint:
centerpoint = (int(width / 2), int(height / 2))
requested_target_x = centerpoint[0]
requested_target_y = centerpoint[1]
aspect = width / float(height)
if aspect > target_aspect:
# Then crop the left and right edges:
new_width = int(target_aspect * height)
new_width_half = int(new_width/2)
target_x = bound(new_width_half, width-new_width_half, requested_target_x)
left = target_x - new_width_half
right = target_x + new_width_half
resize = (left, 0, right, height)
else:
# ... crop the top and bottom:
new_height = int(width / target_aspect)
new_height_half = int(new_height/2)
target_y = bound(new_height_half, height-new_height_half, requested_target_y)
top = target_y - new_height_half
bottom = target_y + new_height_half
resize = (0, top, width, bottom)
return resize
def aspect_crop_to_extent(img, target_aspect, centerpoint=None):
'''
Crop an image to the desired perspective at the maximum size available.
Centerpoint can be provided to focus the crop to one side or another -
eg just cut the left side off if interested in the right side.
target_aspect = width / float(height)
centerpoint = (width, height)
'''
resize = resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint)
return img.crop(resize)
class Config:
def __init__(self, tile_ratio=1920/800, tile_width=50, enlargement=8, color_mode='RGB'):
self.tile_ratio = tile_ratio # 2.4
self.tile_width = tile_width # height/width of mosaic tiles in pixels
self.enlargement = enlargement # mosaic image will be this many times wider and taller than original
self.color_mode = color_mode # mosaic image will be this many times wider and taller than original
#property
def tile_height(self):
return int(self.tile_width / self.tile_ratio)
#property
def tile_size(self):
return self.tile_width, self.tile_height # PIL expects (width, height)
class TileBox:
"""
Container to import, process, hold, and compare all of the tiles
we have to make the mosaic with.
"""
def __init__(self, tile_paths, config):
self.config = config
self.tiles = list()
self.prepare_tiles_from_paths(tile_paths)
def __process_tile(self, tile_path):
with Image.open(tile_path) as i:
img = i.copy()
img = aspect_crop_to_extent(img, self.config.tile_ratio)
large_tile_img = img.resize(self.config.tile_size, Image.ANTIALIAS).convert(self.config.color_mode)
self.tiles.append(large_tile_img)
return True
def prepare_tiles_from_paths(self, tile_paths):
print('Reading tiles from provided list...')
progress = ProgressCounter(len(tile_paths))
for tile_path in tile_paths:
progress.update()
self.__process_tile(tile_path)
print('Processed tiles.')
return True
def best_tile_block_match(self, tile_block_original):
match_results = [img_mse(t, tile_block_original) for t in self.tiles]
best_fit_tile_index = np.argmin(match_results)
return best_fit_tile_index
def best_tile_from_block(self, tile_block_original, reuse=False):
if not self.tiles:
print('Ran out of images.')
raise KeyboardInterrupt
#start_time = time.time()
i = self.best_tile_block_match(tile_block_original)
#print("BLOCK MATCH took --- %s seconds ---" % (time.time() - start_time))
match = self.tiles[i].copy()
if not reuse:
del self.tiles[i]
return match
class SourceImage:
"""Processing original image - scaling and cropping as needed."""
def __init__(self, image_path, config):
print('Processing main image...')
self.image_path = image_path
self.config = config
with Image.open(self.image_path) as i:
img = i.copy()
w = img.size[0] * self.config.enlargement
h = img.size[1] * self.config.enlargement
large_img = img.resize((w, h), Image.ANTIALIAS)
w_diff = (w % self.config.tile_width)/2
h_diff = (h % self.config.tile_height)/2
# if necesary, crop the image slightly so we use a
# whole number of tiles horizontally and vertically
if w_diff or h_diff:
large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff))
self.image = large_img.convert(self.config.color_mode)
print('Main image processed.')
class MosaicImage:
"""Holder for the mosaic"""
def __init__(self, original_img, target, config):
self.config = config
self.target = target
# Lets just start with original image, scaled up, instead of a blank one
self.image = original_img
# self.image = Image.new(original_img.mode, original_img.size)
self.x_tile_count = int(original_img.size[0] / self.config.tile_width)
self.y_tile_count = int(original_img.size[1] / self.config.tile_height)
self.total_tiles = self.x_tile_count * self.y_tile_count
print(f'Mosaic will be {self.x_tile_count:,} tiles wide and {self.y_tile_count:,} tiles high ({self.total_tiles:,} total).')
def add_tile(self, tile, coords):
"""Adds the provided image onto the mosiac at the provided coords."""
try:
self.image.paste(tile, coords)
except TypeError as e:
print('Maybe the tiles are not the right size. ' + str(e))
def save(self):
self.image.save(self.target)
def coords_from_middle(x_count, y_count, y_bias=1, shuffle_first=0, ):
'''
Lets start in the middle where we have more images.
And we dont get "lines" where the same-best images
get used at the start.
y_bias - if we are using non-square coords, we can
influence the order to be closer to the real middle.
If width is 2x height, y_bias should be 2.
shuffle_first - We can suffle the first X coords
so that we dont use all the same-best images
in the same spot - in the middle
from movies.mosaic_mem import coords_from_middle
x = 10
y = 10
coords_from_middle(x, y, y_bias=2, shuffle_first=0)
'''
x_mid = int(x_count/2)
y_mid = int(y_count/2)
coords = list(itertools.product(range(x_count), range(y_count)))
coords.sort(key=lambda c: abs(c[0]-x_mid)*y_bias + abs(c[1]-y_mid))
coords = shuffle_first_items(coords, shuffle_first)
return coords
def create_mosaic(source_path, target, tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
"""Forms an mosiac from an original image using the best
tiles provided. This reads, processes, and keeps in memory
a copy of the source image, and all the tiles while processing.
Arguments:
source_path -- filepath to the source image for the mosiac
target -- filepath to save the mosiac
tile_ratio -- height/width of mosaic tiles in pixels
tile_width -- width of mosaic tiles in pixels
enlargement -- mosaic image will be this many times wider and taller than the original
reuse -- Should we reuse tiles in the mosaic, or just use each tile once?
color_mode -- L for greyscale or RGB for color
tile_paths -- List of filepaths to your tiles
shuffle_first -- Mosiac will be filled out starting in the center for best effect. Also,
we will shuffle the order of assessment so that all of our best images aren't
necessarily in one spot.
"""
config = Config(
tile_ratio = tile_ratio, # height/width of mosaic tiles in pixels
tile_width = tile_width, # height/width of mosaic tiles in pixels
enlargement = enlargement, # the mosaic image will be this many times wider and taller than the original
color_mode = color_mode, # L for greyscale or RGB for color
)
# Pull in and Process Original Image
print('Setting Up Target image')
source_image = SourceImage(source_path, config)
# Setup Mosaic
mosaic = MosaicImage(source_image.image, target, config)
# Assest Tiles, and save if needed, returns directories where the small and large pictures are stored
print('Assessing Tiles')
tile_box = TileBox(tile_paths, config)
try:
progress = ProgressCounter(mosaic.total_tiles)
for x, y in coords_from_middle(mosaic.x_tile_count, mosaic.y_tile_count, y_bias=config.tile_ratio, shuffle_first=shuffle_first):
progress.update()
# Make a box for this sector
box_crop = (x * config.tile_width, y * config.tile_height, (x + 1) * config.tile_width, (y + 1) * config.tile_height)
# Get Original Image Data for this Sector
comparison_block = source_image.image.crop(box_crop)
# Get Best Image name that matches the Orig Sector image
tile_match = tile_box.best_tile_from_block(comparison_block, reuse=reuse)
# Add Best Match to Mosaic
mosaic.add_tile(tile_match, box_crop)
# Saving Every Sector
mosaic.save()
except KeyboardInterrupt:
print('\nStopping, saving partial image...')
finally:
mosaic.save()
It's ok, this is the new file i have to create in order for it to work :
create_mosaic(
subject="/path/to/source/image",
target="/path/to/output/image",
tile_paths=["/path/to/tile_1" , ... "/path/to/tile_n"],
tile_ratio=1920/800, # Crop tiles to be height/width ratio
tile_width=300, # Tile will be scaled
enlargement=20, # Mosiac will be this times larger than original
reuse=False, # Should tiles be used multiple times?
color_mode='L', # RGB (color) L (greyscale)
)
Problem resovled.

ModuleNotFoundError: No module named 'gtk'

I've tried to follow several solutions on the internet also this one, but no luck. I'm in the process of implementing a object detection system. I am on Windows 10, and using PyCharm using python-3.8
I am getting errors for the packages, I've tried to add them through the package installer and through the terminal, no luck. Here's the error:
Traceback (most recent call last):
File "D:/CabaleGame/runner.py", line 2, in <module>
import processCards
File "D:\CabaleGame\processCards.py", line 1, in <module>
import gtk.gdk
ModuleNotFoundError: No module named 'gtk'
Process finished with exit code 1
I've downloaded the package from here and here, which one is correct?
My program file:
import gtk.gdk
import cv
import time
import os
import string
def takeScreenCapture(screenShotNum = ""):
time.sleep(1)
w = gtk.gdk.get_default_root_window()
sz = w.get_size()
#print "The size of the window is %d x %d" % sz
pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB,False,8,sz[0],sz[1])
pb = pb.get_from_drawable(w,w.get_colormap(),0,0,0,0,sz[0],sz[1])
# Convert gtk.PixelBuf to a NumPy array
array = pb.get_pixels_array()
# Convert NumPy array to CvMat
mat = cv.fromarray(array)
# Convert RGB to BGR
cv.CvtColor(mat, mat, cv.CV_RGB2BGR)
#cv.ShowImage("win",mat)
#cv.WaitKey(0)
return mat
def getMeaningFromCards(cards):
"""
This takes a dictionary of the form:
(x, y) : Card image
and returns a dictionary of the form:
(x, y) : (number, suit)
(x, y) are the coordinates of the top left of the card
"""
imgdir = "LibraryImages"
templatesNums = os.listdir(os.path.join(imgdir,"Numbers"))
templatesSuits = os.listdir(os.path.join(imgdir,"Suits"))
#templates = filter(lambda s: s[-4:] == ".png", templates)
templatesNums = map(lambda s: os.path.join(imgdir,"Numbers", s), templatesNums)
templatesSuits = map(lambda s: os.path.join(imgdir, "Suits", s), templatesSuits)
for k in cards.keys():
card = cards[k]
cardImg = cv.CreateImageHeader((card.width, card.height), 8, 3)
cv.SetData(cardImg, card.tostring())
numAndSuit3 = cv.GetSubRect(cardImg, (0,0,30,80))
numAndSuit1 = cv.CreateImage((numAndSuit3.width, numAndSuit3.height), 8, 1)
cv.CvtColor(numAndSuit3, numAndSuit1, cv.CV_RGB2GRAY)
# Convert the 1 channel grayscale to 3 channel grayscale
# (GRAY2RGB doesn't actually introduce color)
cv.CvtColor(numAndSuit1, numAndSuit3, cv.CV_GRAY2RGB)
num = findBestTemplateMatch(templatesNums, numAndSuit3)
suit = findBestTemplateMatch(templatesSuits, numAndSuit3)
#print num, suit
# If this image was recognized as a card, but didn't match
# any template, it shouldn't be in the list in the first place
if num == None or suit == None:
del cards[k]
continue
num = string.split(os.path.basename(num), '.')[0]
suit = string.split(os.path.basename(suit), '.')[0]
# The alternate file names have underscores
# after their names
if num[-1] == '_':
num = num[:-1]
if suit[-1] == '_':
suit = suit[:-1]
cards[k] = (num, suit)
#cv.ShowImage("NumandSuit", numAndSuit)
#cv.WaitKey(0)
print
cards
return cards
def findBestTemplateMatch(tplList, img):
"""
Compares img against a list of templates.
tplList is a list of string filenames of template images
Returns a tuple (num, suit) if a template is suitably matched
or None if not
"""
minTpl = 200 # arbitrarily large number
tString = None
for t in tplList:
tpl = cv.LoadImage(t)
w = img.width - tpl.width + 1
h = img.height - tpl.height + 1
result = cv.CreateImage((w,h), 32, 1)
cv.MatchTemplate(img, tpl, result, cv.CV_TM_SQDIFF_NORMED)
(minVal, maxVal, minLoc, maxLoc) = cv.MinMaxLoc(result)
#print t
#print (minVal, maxVal, minLoc, maxLoc)
# 0.2 found by experiment (the non-card images end up being around
# 0.25 - 0.28, and all the card images were much around 0.08 and less
if minVal < minTpl and minVal < 0.2:
minTpl = minVal
tString = t
#print minTpl, tString
#cv.ShowImage("win", img)
#cv.ShowImage("win2", result)
#cv.WaitKey(0)
return tString
def extractCards(fileName = None):
"""
Given an image, this will extract the cards from it.
This takes a filename as an optional argument
This filename should be the name of an image file.
This returns a dictionary of the form:
(x, y) : Card image
It is likely that the output from this will go to the
getMeaningFromCards() function.
"""
if fileName == None:
mat = takeScreenCapture()
else:
mat = cv.LoadImage(fileName)
# First crop the image: but only crop out the bottom.
# It is useful to have all dimensions accurate to the screen
# because otherwise they will throw off the mouse moving and clicking.
# Cropping out the bottom does not change anything in terms of the mouse.
unnec_top_distance = 130
unnec_bottom_distance = 40
margin = 50
submat = cv.GetSubRect(mat, (0,0,mat.width, mat.height - unnec_bottom_distance))
subImg = cv.CreateImageHeader((submat.width, submat.height), 8, 3)
cv.SetData(subImg, submat.tostring())
gray = cv.CreateImage((submat.width, submat.height), 8, 1)
cv.CvtColor(submat, gray, cv.CV_RGB2GRAY)
thresh = 250
max_value = 255
cv.Threshold(gray, gray, thresh, max_value, cv.CV_THRESH_BINARY)
cv.Not(gray,gray)
#cv.ShowImage("sub", submat)
#cv.WaitKey(0)
storage = cv.CreateMemStorage (0)
cpy = cv.CloneImage(gray)
contours = cv.FindContours( cpy, storage, cv.CV_RETR_LIST, cv.CV_CHAIN_APPROX_SIMPLE, (0,0) );
#contours = cv.ApproxPoly(contours, cv.CreateMemStorage(), cv.CV_POLY_APPROX_DP, 3, 1)
bboxes = []
if contours:
while(contours):
area = cv.ContourArea(contours)
# It turns out that all the cards are about 44000 in area...
# It would definitely be nice to have a better way to do this:
# ie, find the size of the card programmatically and use it then
if(area > 44000 and area < submat.width*submat.height*2/3):
bb = cv.BoundingRect(contours)
bboxes.append(bb)
contours = contours.h_next()
#drawBoundingBoxes(bboxes, submat)
# cards is a dictionary of the form:
# (x, y) : card
cards = {}
for box in bboxes:
card = cv.GetSubRect(subImg, box)
#cv.ShowImage("card", card)
#cv.WaitKey(0)
cards[(box[0], box[1])] = card
return cards
def drawBoundingBoxes(bb, img):
for b in bb:
x = b[0]
y = b[1]
width = b[2]
height = b[3]
cv.Rectangle(img, (x,y), (x+width, y+height), (0,255,0,0))
cv.ShowI
mage("bb", img)
cv.WaitKey(0)
def drawSquares(listWithPoints,img):
for l in listWithPoints:
for p in range(len(l)-1):
cv.Line(img, l[p], l[p+1], (0,0,255,0),2)
cv.Line(img, l[-1], l[0], (0,0,255,0),2)
#cv.ShowImage("sub", img)
#cv.WaitKey(0)
def contourToPointList(contour):
plist = []
for (x,y) in contour:
plist.append((x,y))
return plist
if __name__ == '__main__':
cards = extractCards('CardImages/4_heart.jpg')
print
cards
#c = cards[cards.keys()[0]]
#print c
Is it possible to add it manually through the folders?
Would it work if i put the package her:
C:\Users\User1\AppData\Local\Programs\Python\Python38\include
From python documentation :
Replace import gtk.gdk by :
import gi
gi.require_version("Gtk", "insert your gtk version")
from gi.repository import Gtk

Pixelation of PIL generated image

I am trying to generate PDF files from generated image. The generated PDF file has high level of pixelation on zooming in which is creating shadows during printing.
Image of zoomed in qrcode from PDF
Showing gray zone around qrcode modules and pixels (gray) which should be white otherwise. It does not matter if the desired_resolution matches or is lower than the resolution in which the original image was created.
What could be the issue and possible fixes?
qr_size_mm = 8
MM2PX_FACTOR = 94.48 # 2400 dpi
def create_code(prefix, number, postfix=None):
message = prefix + str(number)
message += postfix if postfix is not None else ""
series_qrcode = pyqrcode.create(message, error='H', version=3, mode='binary')
# print(series_qrcode.get_png_size())
binary = BytesIO()
desired_scale = int(qr_size_px / series_qrcode.get_png_size())
series_qrcode.png(binary, scale=desired_scale, module_color=(0, 0, 0),
background=(255, 255, 255), quiet_zone=3)
tmpIm = Image.open(binary).convert('RGB')
qr_dim = tmpIm.getbbox()
# print(qr_dim)
return tmpIm, qr_dim
qr_size_px = int(qr_size_mm * MM2PX_FACTOR)
# create A4 canvas
paper_width_mm = 210
paper_height_mm = 297
start_offset_mm = 10
start_offset_px = start_offset_mm * MM2PX_FACTOR
canvas_width_px = int(paper_width_mm * MM2PX_FACTOR)
canvas_height_px = int(paper_height_mm * MM2PX_FACTOR)
pil_paper_canvas = Image.new('RGB', (canvas_width_px, canvas_height_px), (255, 255, 255))
# desired pixels for 1200 dpi print
required_resolution_px = 94.48 # 47.244 # 23.622
required_resolution = 2400
print("Page dimension {page_width} {page_height} offset {offset}".format(page_width=canvas_width_px, page_height=canvas_height_px, offset=start_offset_px))
start_range = 10000100000000
for n in range(0, 5):
print("Generating ", start_range+n)
qr_image, qr_box = create_code("TLTR", number=start_range+n)
# qr_image.show()
print("qr_box ", qr_box)
qr_x = int(start_offset_px + ((n+1) * qr_box[2]))
qr_y = int(start_offset_px)
print("pasting at ", qr_x, qr_y)
pil_paper_canvas.paste(qr_image, (qr_x, qr_y))
# create a canvas just for current qrcode
one_qr_canvas = Image.new('RGB', (int(10*MM2PX_FACTOR), int(10*MM2PX_FACTOR)), (255, 255, 255))
qrXY = int((10*MM2PX_FACTOR - qr_box[2]) / 2)
one_qr_canvas.paste(qr_image, (qrXY, qrXY))
one_qr_canvas = one_qr_canvas.resize((int(qr_size_mm*required_resolution_px),
int(qr_size_mm*required_resolution_px)))
one_qr_canvas.save(form_full_path("TLTR"+str(start_range+n)+".pdf"), dpi=(required_resolution, required_resolution))
pil_paper_canvas = pil_paper_canvas.resize((int(paper_width_mm*required_resolution_px),
int(paper_height_mm*required_resolution_px)))
# pil_paper_canvas.show()
pil_paper_canvas.save(form_full_path("TLTR_qr_A4.pdf"), dpi=(required_resolution, required_resolution))
I incorporated 3 changes to fix/workaround the issue:
Instead of specifying fixed number for resizing, switched to scale (m*n).
Used lineType=cv2.LINE_AA for anti-aliasing as suggested by #physicalattraction
That still one issue unresolved which was that PIL generated PDF #96dpi which is not good for printing. Was unable to figure the option to use Print-ready PDF (PDF/A or something on those lines). Hence switched to generating PNG to generate high-quality qrcodes.

python-fu Copy an Image

I've a color list file I read it into a python list and I'd like to create one (or several) image(s) composed by squares with background color read from file and as foreground html string of the color (written with white). I.e. I read #ff0000 from input file then I create a 100x100 square with red background and a white string "#ff0000" as foreground...and so on for each color in input file.
This is my script:
#!/usr/bin/env python
from gimpfu import *
def readColors(ifc):
"""Read colors from input file and return a python list with all them"""
a = []
fd = open(ifc,"r")
for i in fd:
if not i.startswith("//"):#skip comment lines
a.append(i.rstrip('\n'))
return a
def rgb2html(col):
"""Converts a color: from (255,255,255) to #ffffff"""
r,g,b = col
return "#%x%x%x" % (r,g,b)
def html2rgb(col):
"""Converts a color: from #ffffff to (255,255,255)"""
s=col.strip('#')
r=int(s[:2],16)
g=int(s[2:4],16)
b=int(s[4:6],16)
return r,g,b
def nextColor():
"""Gets next html color from color list"""
col = nextColor.array[nextColor.counter]
nextColor.counter +=1
return col
def isNextColor():
"""Is there another color or list is over?"""
return nextColor.counter<isNextColor.colorslenght
def isPageEnded(y,YSIZE):
"""Is there enough room to draw another square?"""
return (y+100)>=YSIZE
def newPage(XSIZE,YSIZE):
"""Returns a new image"""
return gimp.Image(XSIZE,YSIZE,RGB)
def createImage(color,text):
"""Draws a square filled with color and white color text. It works!"""
gimp.set_foreground((255,255,255))#text color
gimp.set_background(color) #background color
image = gimp.Image(100,100,RGB)
back = gimp.Layer(image,"font",100,100,RGB_IMAGE,100,NORMAL_MODE)
image.add_layer(back,1)
back.fill(BACKGROUND_FILL)
lay = back.copy()
lay.name = "font"
image.add_layer(lay,0)
lay = pdb.gimp_text_fontname(image,None,2,100/4,text,2,True,18,PIXELS,"Sans")
image.merge_down(lay, NORMAL_MODE)
image.flatten()
return image
def drawRect(image,x,y):
"""Here I'd like to copy the result of createImage at current x,y position of current image. It doesn't work!"""
text = nextColor()
color = html2rgb(text)
img = createImage(color,text)
drawable = pdb.gimp_image_active_drawable(image)
image.disable_undo()
pdb.gimp_selection_none(image)
pdb.gimp_image_select_rectangle(image, 2, x, y, 100, 100)
if pdb.gimp_edit_named_copy_visible(img, "buffer"):
floating_sel = pdb.gimp_edit_named_paste(drawable, "buffer", TRUE)
pdb.gimp_selection_none(image)
image.flatten()
gimp.Display(image)#to debug
image.enable_undo()
def savePage(image,directory,filename):
"""Saves composed image to filename"""
drawable = pdb.gimp_image_active_drawable(image)
pdb.file_png_save(image, drawable, directory+"\\"+filename, filename, 0,9,1,0,0,1,1)
def draw(ifile,savedir,prefix):
"""Main method. it manage page's room, slicing it in several 100x100 squares"""
YSIZE = 1000
XSIZE = 1000
x = y = pc = 0
colorArray = readColors(ifile)
nextColor.counter = 0
nextColor.array = colorArray
isNextColor.colorslenght = len(colorArray)
pdb.gimp_context_push()
image = newPage(XSIZE,YSIZE)
while(isNextColor()):
drawRect(image,x,y)
x += 100 # move to next column
if x+100>=XSIZE:#move to next row
x = 0
y += 100
if isPageEnded(y,YSIZE):
savePage(image,savedir,prefix+str(pc)+".png")
gimp.Display(image)#to debug
pc += 1
image = newPage(XSIZE,YSIZE)
x = 0
y = 0
savePage(image,savedir,prefix+str(pc)+".png")#save last page
pdb.gimp_context_pop()
ifc = '<path to color list file>\\colors.txt'
ofc = '<path to output directory>\\palette'
prefix = 'table' #prefix for each file(i.e.: table0.png, table1.png...)
draw(ifc,ofc,prefix) #main method call
My current problem is with drawRect method. I can't copy image returned by createImage method at coords x,y of bigger image in drawRect method. In drawRect method I tried to used gimp's "copy into selection": select an area, and paste in it stuff copied from another image. But I have a trouble with layers and I can't get image copied at right position.
Just for completeness this are few lines from ifc file:
#b6ffff
#f079f3
#9979f3
#79c2f3
#7a79f3
#79f380
#f3799a
Thanks in advance for any help.
Alberto
Maybe it could be useful for someone else (it must be saved as "draw-colors.py" and copied to GIMP 2/lib/gimp/2.0/plug-ins folder):
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gimpfu import *
def readColors(ifile):
"""Read colors from input file and return a python list with all them"""
a = []
fd = open(ifile,"r")
for i in fd:
if not i.startswith('//'):
a.append(i)
return a
def rgb2html(col):
"""Converts a color: from (255,255,255) to #ffffff"""
r,g,b = col
return "#%x%x%x" % (r,g,b)
def html2rgb(col):
"""Converts a color: from #ffffff to (255,255,255)"""
s=col.strip('#')
r=int(s[:2],16)
g=int(s[2:4],16)
b=int(s[4:6],16)
return r,g,b
def nextColor():
"""Gets next html color from color list"""
col = html2rgb(nextColor.array[nextColor.counter])
nextColor.counter +=1
return col
def isNextColor():
"""Is there another color or list is over?"""
return nextColor.counter<isNextColor.colorslenght
def isPageEnded(y,sizey):
"""Is there enough room to draw another square?"""
return (y+100)>=sizey
def newPage(sizex,sizey):
"""Returns a new image"""
image = pdb.gimp_image_new(sizex, sizey, RGB)
layer = pdb.gimp_layer_new(image, sizex, sizey, RGB, "layer", 0, 0)
pdb.gimp_image_add_layer(image, layer, 0)
drw = pdb.gimp_image_active_drawable(image)
pdb.gimp_context_set_background((255,255,255))
pdb.gimp_drawable_fill(drw, BACKGROUND_FILL)
pdb.gimp_context_set_brush('Circle (01)')
return image
def savePage(image,filename):
"""Saves composed image to filename"""
layers = image.layers
last_layer = len(layers)-1
try:
disable=pdb.gimp_image_undo_disable(image)
pdb.gimp_layer_add_alpha(layers[0])
pdb.plug_in_colortoalpha(image,image.active_layer,(0,0,0))
layer = pdb.gimp_image_merge_visible_layers(image, 1)
enable = pdb.gimp_image_undo_enable(image)
pdb.file_png_save(image, image.active_layer, filename, filename, 0,9,1,0,0,1,1)
except Exception as e:
raise e
def drawRect(x,y,color,image):
"""draw a single square"""
text = rgb2html(color)
pdb.gimp_image_select_rectangle(image, 2, x, y, 100, 100)
pdb.gimp_context_set_background(color)
pdb.gimp_edit_fill(image.active_layer, 1)
try:
text_layer = pdb.gimp_text_fontname(image, None, x, y+(100/2), text, 2, 1, 18, 0, "Sans")
pdb.gimp_image_merge_down(image, text_layer, 0)
except Exception as e:
raise e
def draw(ifile,odir,prefix,sizex,sizey):
"""Main method. it manage page's room, slicing it in several 100x100 squares"""
colorArray = readColors(ifile)
nextColor.counter = 0
nextColor.array = colorArray
isNextColor.colorslenght = len(colorArray)
pc = x = y = 0
image = newPage(sizex,sizey)
try:
while(isNextColor()):
pdb.gimp_context_push()
drawRect(x,y,nextColor(),image)
x += 100#cambia colonna
if x+100>=sizex:#cambia riga
x = 0
y += 100
if isPageEnded(y,sizey):#salva pagina
savePage(image,odir+'\\'+prefix+str(pc)+".png")
pc += 1
image = newPage(sizex,sizey)
x = y = 0
savePage(image,odir+'\\'+prefix+str(pc)+".png")
pdb.gimp_context_pop()
except Exception as e:
raise e
register(
"draw-colors",
N_("Create a color map from a text color file"),
"Create a color map from a text color file",
"Alberto",
"#Copyright Alberto 2014",
"2014/10/21",
N_("Draw Colors..."),
"",
[
(PF_FILE, "ifile", N_("Color input file:"), 'default\\input\\colorfile\\path\\colorlist.txt'),
(PF_DIRNAME, "odir", N_("Path for png export:"), 'default\\output\\path'),
(PF_STRING, "prefix", N_("Filename prefix for export:"), "table"),
(PF_INT8, "sizex", N_("Image's x size:"), 1024),
(PF_INT8, "sizey", N_("Image's y size:"), 1024)
],
[],
draw, #main method call
menu="<Image>/Filters/Alberto",
domain=("gimp20-python", gimp.locale_directory)
)
main()
Output example:

Categories