I am trying to generate PDF files from generated image. The generated PDF file has high level of pixelation on zooming in which is creating shadows during printing.
Image of zoomed in qrcode from PDF
Showing gray zone around qrcode modules and pixels (gray) which should be white otherwise. It does not matter if the desired_resolution matches or is lower than the resolution in which the original image was created.
What could be the issue and possible fixes?
qr_size_mm = 8
MM2PX_FACTOR = 94.48 # 2400 dpi
def create_code(prefix, number, postfix=None):
message = prefix + str(number)
message += postfix if postfix is not None else ""
series_qrcode = pyqrcode.create(message, error='H', version=3, mode='binary')
# print(series_qrcode.get_png_size())
binary = BytesIO()
desired_scale = int(qr_size_px / series_qrcode.get_png_size())
series_qrcode.png(binary, scale=desired_scale, module_color=(0, 0, 0),
background=(255, 255, 255), quiet_zone=3)
tmpIm = Image.open(binary).convert('RGB')
qr_dim = tmpIm.getbbox()
# print(qr_dim)
return tmpIm, qr_dim
qr_size_px = int(qr_size_mm * MM2PX_FACTOR)
# create A4 canvas
paper_width_mm = 210
paper_height_mm = 297
start_offset_mm = 10
start_offset_px = start_offset_mm * MM2PX_FACTOR
canvas_width_px = int(paper_width_mm * MM2PX_FACTOR)
canvas_height_px = int(paper_height_mm * MM2PX_FACTOR)
pil_paper_canvas = Image.new('RGB', (canvas_width_px, canvas_height_px), (255, 255, 255))
# desired pixels for 1200 dpi print
required_resolution_px = 94.48 # 47.244 # 23.622
required_resolution = 2400
print("Page dimension {page_width} {page_height} offset {offset}".format(page_width=canvas_width_px, page_height=canvas_height_px, offset=start_offset_px))
start_range = 10000100000000
for n in range(0, 5):
print("Generating ", start_range+n)
qr_image, qr_box = create_code("TLTR", number=start_range+n)
# qr_image.show()
print("qr_box ", qr_box)
qr_x = int(start_offset_px + ((n+1) * qr_box[2]))
qr_y = int(start_offset_px)
print("pasting at ", qr_x, qr_y)
pil_paper_canvas.paste(qr_image, (qr_x, qr_y))
# create a canvas just for current qrcode
one_qr_canvas = Image.new('RGB', (int(10*MM2PX_FACTOR), int(10*MM2PX_FACTOR)), (255, 255, 255))
qrXY = int((10*MM2PX_FACTOR - qr_box[2]) / 2)
one_qr_canvas.paste(qr_image, (qrXY, qrXY))
one_qr_canvas = one_qr_canvas.resize((int(qr_size_mm*required_resolution_px),
int(qr_size_mm*required_resolution_px)))
one_qr_canvas.save(form_full_path("TLTR"+str(start_range+n)+".pdf"), dpi=(required_resolution, required_resolution))
pil_paper_canvas = pil_paper_canvas.resize((int(paper_width_mm*required_resolution_px),
int(paper_height_mm*required_resolution_px)))
# pil_paper_canvas.show()
pil_paper_canvas.save(form_full_path("TLTR_qr_A4.pdf"), dpi=(required_resolution, required_resolution))
I incorporated 3 changes to fix/workaround the issue:
Instead of specifying fixed number for resizing, switched to scale (m*n).
Used lineType=cv2.LINE_AA for anti-aliasing as suggested by #physicalattraction
That still one issue unresolved which was that PIL generated PDF #96dpi which is not good for printing. Was unable to figure the option to use Print-ready PDF (PDF/A or something on those lines). Hence switched to generating PNG to generate high-quality qrcodes.
Related
I have been stuck on this program for a while and I am completely lost as to what is wrong and what I need to fix. I am trying to use steganography to encrypt a secret image inside of a cover image, and then decrypt the secret image. I am given part of the code and I have to implement the encode_pixel and decode_pixel functions, as well as the "helper functions".
When I run the program, the resulting image is completely grey instead of the secret image. I do not know where my error is, and if it is in the encode or decode function, or if it is in the helper functions.
These are the functions that I have implemented:
ORIGINAL_URL = "https://codehs.com/uploads/c709d869e62686611c1ac849367b3245"
SECRET_URL = "https://codehs.com/uploads/e07cd01271cac589cc9ef1bf012c6a0c"
IMAGE_LOAD_WAIT_TIME = 1000
# Constants for pixel indices
RED = 0
GREEN = 1
BLUE = 2
# Constants for colors
MAX_COLOR_VALUE = 255
MIN_COLOR_VALUE = 0
COLOR_THRESHOLD = 128
# Constants for spacing
X_GAP = 100
Y_GAP = 50
TEXT_Y_GAP = 4
IMAGE_WIDTH = 100
IMAGE_HEIGHT = 100
IMAGE_X = 25
IMAGE_Y = 25
# Set Canvas size
set_size(400, 480)
# Encodes the given secret pixel into the low bits of the
# RGB values of the given cover pixel
# Returns the modified cover pixel
def encode_pixel(cover_pixel, secret_pixel):
# Implement this function
# return a temporary value. Change this!!
return_pixel = [0,0,0]
if secret_pixel[RED]>= COLOR_THRESHOLD:
return_pixel[RED] = set_lowest_bit(cover_pixel[RED], 1)
else:
return_pixel[RED] = set_lowest_bit(cover_pixel[RED], 0)
if secret_pixel[GREEN] >= COLOR_THRESHOLD:
return_pixel[GREEN] = set_lowest_bit(cover_pixel[GREEN], 1)
else:
return_pixel[GREEN] = set_lowest_bit(cover_pixel[GREEN], 0)
if secret_pixel[BLUE] >= COLOR_THRESHOLD:
return_pixel[BLUE] = set_lowest_bit(cover_pixel[BLUE], 1)
else:
return_pixel[BLUE] = set_lowest_bit(cover_pixel[BLUE], 0)
return return_pixel
# Extracts the RGB values for a secret pixel from the low bits
# of the given cover pixel
# Input is an array of RGB values for a pixel.
# Returns a tuple of RGB values for the decoded pixel
def decode_pixel(cover_pixel):
secret_pixel = [0, 0, 0]
red_bit = get_lowest_bit(cover_pixel[RED])
green_bit = get_lowest_bit(cover_pixel[GREEN])
blue_bit = get_lowest_bit(cover_pixel[BLUE])
if red_bit == 1:
secret_pixel[RED] = MAX_COLOR_VALUE
else:
secret_pixel[RED] = COLOR_THRESHOLD
if green_bit == 1:
secret_pixel[GREEN] = MAX_COLOR_VALUE
else:
secret_pixel[GREEN] = COLOR_THRESHOLD
if blue_bit == 1:
secret_pixel[BLUE] = MAX_COLOR_VALUE
else:
secret_pixel[BLUE] = COLOR_THRESHOLD
return secret_pixel
# Returns true if the given value is even, false otherwise
def is_even(value):
return value % 2 == 0
# Given a number, return the lowest bit in the binary representation
# of the number.
# Returns either a 0 or a 1
def get_lowest_bit(value):
if is_even:
return 0
else:
return 1
# Given a number, return a new number with the same underlying bits
# except the lowest bit is set to the given bit_value.
def set_lowest_bit(value, bit_value):
if bit_value == 0:
if is_even:
return value
else:
return value -1
else:
if not is_even:
return value
else:
return value +1
**# We don't have to change this code:**
def encrypt(cover, secret):
# Loop over each pixel in the image
for x in range(IMAGE_WIDTH):
for y in range(IMAGE_HEIGHT):
pass
# Get the pixels at this location for both images
cover_pixel = cover.get_pixel(x, y)
secret_pixel = secret.get_pixel(x, y)
# Modify the cover pixel to encode the secret pixel
new_cover_color = encode_pixel(cover_pixel, secret_pixel)
# Update this pixel in the cover image to have the
# secret bit encoded
cover.set_red(x, y, new_cover_color[RED])
cover.set_green(x, y, new_cover_color[GREEN])
cover.set_blue(x, y, new_cover_color[BLUE])
print("Done encrypting")
return cover
"""
Decrypts a secret image from an encoded cover image.
Returns an Image
"""
def decrypt(cover_image, result):
# secret image will start off with the cover pixels
# As we loop over the coverImage to discover the secret embedded image,
# we will update secretImage pixel by pixel
# Loop over each pixel in the image
for x in range(IMAGE_WIDTH):
for y in range(IMAGE_HEIGHT):
#Get the current pixel of the cover image
cover_pixel = cover_image.get_pixel(x, y)
# Compute the secret_pixel from this cover pixel
secret_pixel_color = decode_pixel(cover_pixel)
result.set_red(x, y, secret_pixel_color[RED])
result.set_green(x, y, secret_pixel_color[GREEN])
result.set_blue(x, y, secret_pixel_color[BLUE])
print("Done decrypting")
return result
# Image width cannot be odd, it messes up the math of the encoding
if IMAGE_WIDTH % 2 == 1:
IMAGE_WIDTH -= 1
#Set up original image
#Image(x, y, filename, width=50, height=50, rotation=0) // x,y top left corner
original = Image(ORIGINAL_URL, IMAGE_X, IMAGE_Y, IMAGE_WIDTH, IMAGE_HEIGHT)
# Set up secret image
secret = Image(SECRET_URL, IMAGE_X + original.get_width() + X_GAP, IMAGE_Y,
IMAGE_WIDTH, IMAGE_HEIGHT)
# Set up the cover image
# (identical to original, but will be modified to encode the secret image)
cover_x = IMAGE_X + IMAGE_WIDTH
cover_y = IMAGE_Y + Y_GAP + IMAGE_HEIGHT
cover = Image(ORIGINAL_URL, cover_x, cover_y, IMAGE_WIDTH, IMAGE_HEIGHT)
# Set up result image
result = Image(ORIGINAL_URL, cover_x, cover_y + Y_GAP + IMAGE_HEIGHT,
IMAGE_WIDTH, IMAGE_HEIGHT)
# Add originals
add(original)
add(secret)
# Add cover and result
add(cover)
add(result)
# Add labels for each image
font = "11pt Arial"
def make_label(text, x, y, font):
label = Text(text)
label.set_position(x,y)
label.set_font(font)
add(label)
# Text(label, x=0, y=0, color=None,font=None) // x,y is
# original label
x_pos = original.get_x()
y_pos = original.get_y() - TEXT_Y_GAP
make_label("Original Cover Image", x_pos, y_pos, font)
#secret label
x_pos = secret.get_x()
y_pos = secret.get_y() - TEXT_Y_GAP
make_label("Original Secret Image", x_pos, y_pos, font)
# cover label
x_pos = IMAGE_X
y_pos = cover.get_y() - TEXT_Y_GAP
make_label("Cover Image with Secret Image encoded inside", x_pos, y_pos, font)
# result label
x_pos = IMAGE_X
y_pos = cover.get_y() + IMAGE_HEIGHT + Y_GAP - TEXT_Y_GAP
make_label("Resulting Secret Image decoded from Cover Image", x_pos, y_pos, font)
# Encrypt and decrypt the image
# Displays the changed images
def run_encryption():
encrypt(cover, secret)
print("Decrypting .........")
timer.set_timeout(lambda: decrypt(cover, result), IMAGE_LOAD_WAIT_TIME)
# Wait for images to load before encrypting and decrypting
print("Encrypting ............")
timer.set_timeout(run_encryption, IMAGE_LOAD_WAIT_TIME)
How do I have to change the code? I only get a grey picture on the decoded picture place.
I have a robotic code, that does the following:
camera starts processing and taking images
Mounting Holes (hough transform) function detection is activated
The holes are drawn on the image
approachcirlce function moves robot towards one of the set coordinates
I have two issues :
The mounting holes keep getting called even after detecting the coordinates once.
The robot in the approachcircle function cant move to one coordinates then onto the other. It keeps going back and forth as the x and y aren't specifically set to finish the first set of coordinates first. i.e : between two circles it does not reach either centers as expected. it never reaches the center of a detected circle if its more than one
I want the code to call the mountingholes function once and have the robot to move to each recorded coordinates, after the intial set of coordinates is done. I will have the robot move to another area and start doing the process again. I'm assuming the problem is that the functions are in the camera processing loop which is run indefinitely
The code is below:
##Def:
def approachcircle (r,t,z):
move = robot.Pose()*transl(r,t,z)
robot.MoveL(move)
def approacharea (z):
move = robot.Pose()*transl(0,0,z)
robot.MoveL(move)
def MountingHoles(img,thresh,r):
minR = r
CannyHighT = 50
min_points = 15 #param2
img_1= cv.cvtColor(img,cv.COLOR_BGR2GRAY)
#img3 = cv2.inRange(img_1, thresh, 255)
circles = cv.HoughCircles(img_1,cv.HOUGH_GRADIENT, 1, 2*minR, param1=CannyHighT,
param2=min_points, minRadius=minR, maxRadius=220)
return circles
#Installation
from robolink import * # RoboDK API
from robodk import * # Robot toolbox
RDK = Robolink()
pose = eye()
ITEM_TYPE_ROBOT
RDK = robolink.Robolink()
robot = RDK.Item('TM12X')
import_install('cv2', 'opencv-python')
import cv2 as cv
import numpy as np
import numpy
#----------------------------------
# Settings
PROCESS_COUNT = -1 # How many frames to process before exiting. -1 means indefinitely.
CAM_NAME = "Camera"
DISPLAY_SETTINGS = True
WDW_NAME_PARAMS = 'RoboDK - Blob detection parameters'
DISPLAY_RESULT = True
WDW_NAME_RESULTS = 'RoboDK - Blob detections1'
# Calculate absolute XYZ position clicked from the camera in absolute coordinates:
cam_item = RDK.Item(CAM_NAME, ITEM_TYPE_CAMERA)
if not cam_item.Valid():
raise Exception("Camera not found! %s" % CAM_NAME)
cam_item.setParam('Open', 1) # Force the camera view to open
#----------------------------------------------
# Create an resizable result window
if DISPLAY_RESULT:
cv.namedWindow(WDW_NAME_RESULTS) #, cv.WINDOW_NORMAL)
#----------------------------------------------
# capture = cv.VideoCapture(0)
# retval, image = capture.read()
#----------------------------------------------
# Process camera frames
count = 0
while count < PROCESS_COUNT or PROCESS_COUNT < 0:
print("=============================================")
print("Processing image %i" % count)
count += 1
#----------------------------------------------
# Get the image from RoboDK
bytes_img = RDK.Cam2D_Snapshot("", cam_item)
if bytes_img == b'':
raise
# Image from RoboDK are BGR, uchar, (h,w,3)
nparr = np.frombuffer(bytes_img, np.uint8)
img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
if img is None or img.shape == ():
raise
#----------------------------------------------
# Detect blobs
keypoints = MountingHoles(img,250,50)
i = 0
#----------------------------------------------
# Display the detection to the user (reference image and camera image side by side, with detection results)
if DISPLAY_RESULT:
# Draw detected blobs and their id
i = 0
for keypoint in keypoints[0,:]:
cv.putText(img, str(i), (int(keypoint[0]), int(keypoint[1])), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv.LINE_AA)
cv.circle(img, (int(keypoint[0]), int(keypoint[1])), int(keypoint[2]), (0, 0, 255), 15)
#
i += 1
# Resize the image, so that it fits your screen
img = cv.resize(img, (int(img.shape[1] * .75), int(img.shape[0] * .75)))#
cv.imshow(WDW_NAME_RESULTS, img)
key = cv.waitKey(500)
if key == 27:
break # User pressed ESC, exit
if cv.getWindowProperty(WDW_NAME_RESULTS, cv.WND_PROP_VISIBLE) < 1:
break # User killed the window, exit
#--------------------------------------------------------------------------------------------
# Movement functions
r=0
t=0
i=0
#approacharea(200)
for keypoint in keypoints[0,:]:
#print("id:%i coord=(%0.0f, %0.0f)" % (i, X, Y))
X= int(keypoint[0])-320
Y=int(keypoint[1])-240
r=int(keypoint[2])
print("id:%i coord=(%0.0f, %0.0f)" % (i, X, Y))
if X!= 0 or Y!=0 :
r=X*0.1
t=Y*0.1
approachcircle(r,t,0)
i+=1
So i'm trying to launch a Python script (originally available from here : https://github.com/dvdtho/python-photo-mosaic). Full code at the bottom of this post.
This basically creates a mosaic (from a source image), with the final image (mosaic) is composed of several other images (tiles).
My question is how I am supposed to fill the variables (the ones at line 212) in order to run the script (through Eclispe in my case).
Should I put directly something like this? (in my case the folder Desktop/tiles contains all the jpg files) :
tile_paths = glob.glob("C:/Users/Sylvia/Desktop/tiles/*.jpg") # I've added this line myself
def create_mosaic(source_path="C:\\Users\\Sylvia\\Desktop\\source\\1.jpg", target="C:\\Users\\Sylvia\\Desktop\\source\\result.jpg", tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
Last time i tried i got this error :
def create_mosaic(source, target, tile_ratio=1920/800, tile_width=75,
enlargement=8, reuse=True, color_mode='RGB', tile_paths,
shuffle_first=30):
^ SyntaxError: non-default argument follows default argument
I'm very lost, hopefully someone can help me.
Here's the code :
import time
import itertools
import random
import sys
import numpy as np
from PIL import Image
from skimage import img_as_float
from skimage.measure import compare_mse
def shuffle_first_items(lst, i):
if not i:
return lst
first_few = lst[:i]
remaining = lst[i:]
random.shuffle(first_few)
return first_few + remaining
def bound(low, high, value):
return max(low, min(high, value))
class ProgressCounter:
def __init__(self, total):
self.total = total
self.counter = 0
def update(self):
self.counter += 1
sys.stdout.write("Progress: %s%% %s" % (100 * self.counter / self.total, "\r"))
sys.stdout.flush()
def img_mse(im1, im2):
"""Calculates the root mean square error (RSME) between two images"""
try:
return compare_mse(img_as_float(im1), img_as_float(im2))
except ValueError:
print(f'RMS issue, Img1: {im1.size[0]} {im1.size[1]}, Img2: {im2.size[0]} {im2.size[1]}')
raise KeyboardInterrupt
def resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint=None):
width = img.size[0]
height = img.size[1]
if not centerpoint:
centerpoint = (int(width / 2), int(height / 2))
requested_target_x = centerpoint[0]
requested_target_y = centerpoint[1]
aspect = width / float(height)
if aspect > target_aspect:
# Then crop the left and right edges:
new_width = int(target_aspect * height)
new_width_half = int(new_width/2)
target_x = bound(new_width_half, width-new_width_half, requested_target_x)
left = target_x - new_width_half
right = target_x + new_width_half
resize = (left, 0, right, height)
else:
# ... crop the top and bottom:
new_height = int(width / target_aspect)
new_height_half = int(new_height/2)
target_y = bound(new_height_half, height-new_height_half, requested_target_y)
top = target_y - new_height_half
bottom = target_y + new_height_half
resize = (0, top, width, bottom)
return resize
def aspect_crop_to_extent(img, target_aspect, centerpoint=None):
'''
Crop an image to the desired perspective at the maximum size available.
Centerpoint can be provided to focus the crop to one side or another -
eg just cut the left side off if interested in the right side.
target_aspect = width / float(height)
centerpoint = (width, height)
'''
resize = resize_box_aspect_crop_to_extent(img, target_aspect, centerpoint)
return img.crop(resize)
class Config:
def __init__(self, tile_ratio=1920/800, tile_width=50, enlargement=8, color_mode='RGB'):
self.tile_ratio = tile_ratio # 2.4
self.tile_width = tile_width # height/width of mosaic tiles in pixels
self.enlargement = enlargement # mosaic image will be this many times wider and taller than original
self.color_mode = color_mode # mosaic image will be this many times wider and taller than original
#property
def tile_height(self):
return int(self.tile_width / self.tile_ratio)
#property
def tile_size(self):
return self.tile_width, self.tile_height # PIL expects (width, height)
class TileBox:
"""
Container to import, process, hold, and compare all of the tiles
we have to make the mosaic with.
"""
def __init__(self, tile_paths, config):
self.config = config
self.tiles = list()
self.prepare_tiles_from_paths(tile_paths)
def __process_tile(self, tile_path):
with Image.open(tile_path) as i:
img = i.copy()
img = aspect_crop_to_extent(img, self.config.tile_ratio)
large_tile_img = img.resize(self.config.tile_size, Image.ANTIALIAS).convert(self.config.color_mode)
self.tiles.append(large_tile_img)
return True
def prepare_tiles_from_paths(self, tile_paths):
print('Reading tiles from provided list...')
progress = ProgressCounter(len(tile_paths))
for tile_path in tile_paths:
progress.update()
self.__process_tile(tile_path)
print('Processed tiles.')
return True
def best_tile_block_match(self, tile_block_original):
match_results = [img_mse(t, tile_block_original) for t in self.tiles]
best_fit_tile_index = np.argmin(match_results)
return best_fit_tile_index
def best_tile_from_block(self, tile_block_original, reuse=False):
if not self.tiles:
print('Ran out of images.')
raise KeyboardInterrupt
#start_time = time.time()
i = self.best_tile_block_match(tile_block_original)
#print("BLOCK MATCH took --- %s seconds ---" % (time.time() - start_time))
match = self.tiles[i].copy()
if not reuse:
del self.tiles[i]
return match
class SourceImage:
"""Processing original image - scaling and cropping as needed."""
def __init__(self, image_path, config):
print('Processing main image...')
self.image_path = image_path
self.config = config
with Image.open(self.image_path) as i:
img = i.copy()
w = img.size[0] * self.config.enlargement
h = img.size[1] * self.config.enlargement
large_img = img.resize((w, h), Image.ANTIALIAS)
w_diff = (w % self.config.tile_width)/2
h_diff = (h % self.config.tile_height)/2
# if necesary, crop the image slightly so we use a
# whole number of tiles horizontally and vertically
if w_diff or h_diff:
large_img = large_img.crop((w_diff, h_diff, w - w_diff, h - h_diff))
self.image = large_img.convert(self.config.color_mode)
print('Main image processed.')
class MosaicImage:
"""Holder for the mosaic"""
def __init__(self, original_img, target, config):
self.config = config
self.target = target
# Lets just start with original image, scaled up, instead of a blank one
self.image = original_img
# self.image = Image.new(original_img.mode, original_img.size)
self.x_tile_count = int(original_img.size[0] / self.config.tile_width)
self.y_tile_count = int(original_img.size[1] / self.config.tile_height)
self.total_tiles = self.x_tile_count * self.y_tile_count
print(f'Mosaic will be {self.x_tile_count:,} tiles wide and {self.y_tile_count:,} tiles high ({self.total_tiles:,} total).')
def add_tile(self, tile, coords):
"""Adds the provided image onto the mosiac at the provided coords."""
try:
self.image.paste(tile, coords)
except TypeError as e:
print('Maybe the tiles are not the right size. ' + str(e))
def save(self):
self.image.save(self.target)
def coords_from_middle(x_count, y_count, y_bias=1, shuffle_first=0, ):
'''
Lets start in the middle where we have more images.
And we dont get "lines" where the same-best images
get used at the start.
y_bias - if we are using non-square coords, we can
influence the order to be closer to the real middle.
If width is 2x height, y_bias should be 2.
shuffle_first - We can suffle the first X coords
so that we dont use all the same-best images
in the same spot - in the middle
from movies.mosaic_mem import coords_from_middle
x = 10
y = 10
coords_from_middle(x, y, y_bias=2, shuffle_first=0)
'''
x_mid = int(x_count/2)
y_mid = int(y_count/2)
coords = list(itertools.product(range(x_count), range(y_count)))
coords.sort(key=lambda c: abs(c[0]-x_mid)*y_bias + abs(c[1]-y_mid))
coords = shuffle_first_items(coords, shuffle_first)
return coords
def create_mosaic(source_path, target, tile_ratio=1920/800, tile_width=75, enlargement=8, reuse=True, color_mode='RGB', tile_paths=None, shuffle_first=30):
"""Forms an mosiac from an original image using the best
tiles provided. This reads, processes, and keeps in memory
a copy of the source image, and all the tiles while processing.
Arguments:
source_path -- filepath to the source image for the mosiac
target -- filepath to save the mosiac
tile_ratio -- height/width of mosaic tiles in pixels
tile_width -- width of mosaic tiles in pixels
enlargement -- mosaic image will be this many times wider and taller than the original
reuse -- Should we reuse tiles in the mosaic, or just use each tile once?
color_mode -- L for greyscale or RGB for color
tile_paths -- List of filepaths to your tiles
shuffle_first -- Mosiac will be filled out starting in the center for best effect. Also,
we will shuffle the order of assessment so that all of our best images aren't
necessarily in one spot.
"""
config = Config(
tile_ratio = tile_ratio, # height/width of mosaic tiles in pixels
tile_width = tile_width, # height/width of mosaic tiles in pixels
enlargement = enlargement, # the mosaic image will be this many times wider and taller than the original
color_mode = color_mode, # L for greyscale or RGB for color
)
# Pull in and Process Original Image
print('Setting Up Target image')
source_image = SourceImage(source_path, config)
# Setup Mosaic
mosaic = MosaicImage(source_image.image, target, config)
# Assest Tiles, and save if needed, returns directories where the small and large pictures are stored
print('Assessing Tiles')
tile_box = TileBox(tile_paths, config)
try:
progress = ProgressCounter(mosaic.total_tiles)
for x, y in coords_from_middle(mosaic.x_tile_count, mosaic.y_tile_count, y_bias=config.tile_ratio, shuffle_first=shuffle_first):
progress.update()
# Make a box for this sector
box_crop = (x * config.tile_width, y * config.tile_height, (x + 1) * config.tile_width, (y + 1) * config.tile_height)
# Get Original Image Data for this Sector
comparison_block = source_image.image.crop(box_crop)
# Get Best Image name that matches the Orig Sector image
tile_match = tile_box.best_tile_from_block(comparison_block, reuse=reuse)
# Add Best Match to Mosaic
mosaic.add_tile(tile_match, box_crop)
# Saving Every Sector
mosaic.save()
except KeyboardInterrupt:
print('\nStopping, saving partial image...')
finally:
mosaic.save()
It's ok, this is the new file i have to create in order for it to work :
create_mosaic(
subject="/path/to/source/image",
target="/path/to/output/image",
tile_paths=["/path/to/tile_1" , ... "/path/to/tile_n"],
tile_ratio=1920/800, # Crop tiles to be height/width ratio
tile_width=300, # Tile will be scaled
enlargement=20, # Mosiac will be this times larger than original
reuse=False, # Should tiles be used multiple times?
color_mode='L', # RGB (color) L (greyscale)
)
Problem resovled.
I use Tesseract and python to read digits (from a energy meter).
Everything works well except for the number "1".
Tesseract can not read the "1" Digit.
This is the picture I send to tesseract :
And tesseract reads "0000027 ".
How can I tell Tesseract that the vertical rod is a "1" ?
This is my tesseract initialisation :
import tesseract
TESSERACT_LIBRARY_PATH = "C:\\Program Files (x86)\\Tesseract-OCR"
LANGUAGE = "eng"
CHARACTERS = "0123456789"
FALSE = "0"
TRUE = "1"
def init_ocr():
"""
.. py:function:: init_ocr()
Utilize the Tesseract-OCR library to create an tesseract_ocr that
predicts the numbers to be read off of the meter.
:return: tesseract_ocr Tesseracts OCR API.
:rtype: Class
"""
# Initialize the tesseract_ocr with the english language package.
tesseract_ocr = tesseract.TessBaseAPI()
tesseract_ocr.Init(TESSERACT_LIBRARY_PATH, LANGUAGE,
tesseract.OEM_DEFAULT)
# Limit the characters being seached for to numerics.
tesseract_ocr.SetVariable("tessedit_char_whitelist", CHARACTERS)
# Set the tesseract_ocr to predict for only one character.
tesseract_ocr.SetPageSegMode(tesseract.PSM_AUTO)
# Tesseract's Directed Acyclic Graph.
# Not necessary for number recognition.
tesseract_ocr.SetVariable("load_system_dawg", FALSE)
tesseract_ocr.SetVariable("load_freq_dawg", FALSE)
tesseract_ocr.SetVariable("load_number_dawg", TRUE)
tesseract_ocr.SetVariable("classify_enable_learning", FALSE)
tesseract_ocr.SetVariable("classify_enable_adaptive_matcher", FALSE)
return tesseract_ocr
Slightly irrelevant answer, though may serve your original goal.
I had similar problem with tesseract and I had very strict performance requirements as well. I found this simple solution on SO and crafted simple recogniser with OpenCV.
It boils down to finding bounding rectangles (from edges) on the very clear image that you have and then trying to match found objects versus templates. I believe the solution in your case will be both simple and precise though will require slightly more code than you have now.
I will follow this question, since it will be nice to have working solution with tesseract.
I have a limited time, but it seems to be a working solution:
import os
import cv2
import numpy
KNN_SQUARE_SIDE = 50 # Square 50 x 50 px.
def resize(cv_image, factor):
new_size = tuple(map(lambda x: x * factor, cv_image.shape[::-1]))
return cv2.resize(cv_image, new_size)
def crop(cv_image, box):
x0, y0, x1, y1 = box
return cv_image[y0:y1, x0:x1]
def draw_box(cv_image, box):
x0, y0, x1, y1 = box
cv2.rectangle(cv_image, (x0, y0), (x1, y1), (0, 0, 255), 2)
def draw_boxes_and_show(cv_image, boxes, title='N'):
temp_image = cv2.cvtColor(cv_image, cv2.COLOR_GRAY2RGB)
for box in boxes:
draw_box(temp_image, box)
cv2.imshow(title, temp_image)
cv2.waitKey(0)
class BaseKnnMatcher(object):
distance_threshold = 0
def __init__(self, source_dir):
self.model, self.label_map = self.get_model_and_label_map(source_dir)
#staticmethod
def get_model_and_label_map(source_dir):
responses = []
label_map = []
samples = numpy.empty((0, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE), numpy.float32)
for label_idx, filename in enumerate(os.listdir(source_dir)):
label = filename[:filename.index('.png')]
label_map.append(label)
responses.append(label_idx)
image = cv2.imread(os.path.join(source_dir, filename), 0)
suit_image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
sample = suit_image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE))
samples = numpy.append(samples, sample, 0)
responses = numpy.array(responses, numpy.float32)
responses = responses.reshape((responses.size, 1))
model = cv2.KNearest()
model.train(samples, responses)
return model, label_map
def predict(self, image):
image_standard_size = cv2.resize(image, (KNN_SQUARE_SIDE, KNN_SQUARE_SIDE))
image_standard_size = numpy.float32(image_standard_size.reshape((1, KNN_SQUARE_SIDE * KNN_SQUARE_SIDE)))
closest_class, results, neigh_resp, distance = self.model.find_nearest(image_standard_size, k=1)
if distance[0][0] > self.distance_threshold:
return None
return self.label_map[int(closest_class)]
class DigitKnnMatcher(BaseKnnMatcher):
distance_threshold = 10 ** 10
class MeterValueReader(object):
def __init__(self):
self.digit_knn_matcher = DigitKnnMatcher(source_dir='templates')
#classmethod
def get_symbol_boxes(cls, cv_image):
ret, thresh = cv2.threshold(cv_image.copy(), 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
symbol_boxes = []
for contour in contours:
x, y, width, height = cv2.boundingRect(contour)
# You can test here for box size, though not required in your example:
# if cls.is_size_of_digit(width, height):
# symbol_boxes.append((x, y, x+width, y+height))
symbol_boxes.append((x, y, x+width, y+height))
return symbol_boxes
def get_value(self, meter_cv2_image):
symbol_boxes = self.get_symbol_boxes(meter_cv2_image)
symbol_boxes.sort() # x is first in tuple
symbols = []
for box in symbol_boxes:
symbol = self.digit_knn_matcher.predict(crop(meter_cv2_image, box))
symbols.append(symbol)
return symbols
if __name__ == '__main__':
# If you want to see how boxes detection works, uncomment these:
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# draw_boxes_and_show(img_bw, boxes)
# Uncomment to generate templates from image
# import random
# TEMPLATE_DIR = 'templates'
# img_bw = cv2.imread(os.path.join('original.png'), 0)
# boxes = MeterValueReader.get_symbol_boxes(img_bw)
# for box in boxes:
# # You need to label templates manually after extraction
# cv2.imwrite(os.path.join(TEMPLATE_DIR, '%s.png' % random.randint(0, 1000)), crop(img_bw, box))
img_bw = cv2.imread(os.path.join('original.png'), 0)
vr = MeterValueReader()
print vr.get_value(img_bw)
in my case, there are 2 ways of getting image to resize/crop.
upload normal image file
giving base64 string data of image
in 1. case, resize and crop is working well:
f = Image.open(uploaded_image)
new_width, new_height = 1200, 630
wpercent = (new_width / float(f.size[0]))
hsize = int((float(f.size[1]) * float(wpercent)))
if f.mode != "RGB":
f = f.convert('RGB')
og_img = None
if f.size[0] < new_width:
#upscale
og_img = f.resize((new_width, hsize), Image.BICUBIC)
elif f.size[0] >= new_width:
#downscale
og_img = f.resize((new_width, hsize), Image.ANTIALIAS)
og_img = og_img.crop((0, 0, 1200, 630))
resized/cropped image:
in 2. case, the code is the same as above with slight change in:
base64_image = str(request.POST.get('base64_image')).split(',')[1]
imgfile = open('/'.join([settings.MEDIA_ROOT, 'test.png' ]), 'w+b')
imgfile.write(decodestring(base64_image))
imgfile.seek(0)
f = Image.open(imgfile)
#.. as above
but the resized/cropped image:
why is it in 2.case bad in quality and size? (black bottom part..) what am I doing wrong? am I reading the base64 string in wrong way?
I found a website which has many interesting things in it.It has 2(there are many) tools which maybe can help you.The 1th tool converts image to base64 and the 2th tool minifies the size of image (up to 70% save).
http://www.w3docs.com/tools/minimage/
http://www.w3docs.com/tools/image-base64