OpenCV: Remove the background noise for Tesseract OCR

OpenCV: Remove the background noise for Tesseract OCR - python

I have a drone FPV video, from which I need extract GPS coordinates. The text is white, but because of bad quality of video it seems gray and light blue. Since the background is changing, I have some problems, because in some frames the background has a totally different and in some frames a similar color to the text one.
Here is 2 original images (frames) from the video:
Dark background
Light background
And here is the code that I've found after googling:
import numpy as np
import cv2
import pytesseract
cap = cv2.VideoCapture('v1.avi')
p = 10000
while(cap.isOpened()):
ret, frame = cap.read()
img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
img = img[380:460, 220:640]
img = cv2.bilateralFilter(img, 9, 27, 27)
img = cv2.threshold(img, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
img = cv2.GaussianBlur(img, (9, 9), 0)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
img = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)[1]
img = cv2.dilate(img, kernel)
img = cv2.threshold(img, 0, 250, cv2.THRESH_BINARY_INV)[1]
cv2.imshow('frame', img)
cv2.imshow('or', frame)
print('\n==============')
print(pytesseract.image_to_string(img, config='digits'))
if cv2.waitKey(50) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
And also the results:
Dark background
Light background
As you can see, in the second case the background isn't clear, there is some noise, and from that image Tesseract doesn't extract the text properly.
EDIT:
For some reasons I can't share the video I wrote about above, but here is a similar video from Youtube, and if the text can be extracted from that video, I guess that method will also work for mine or solve many problems at least:

I was able to get something working using a combination of cv2.bilateralFilter and cv2.adaptiveThreshold. Once the background is in one main blob, the numbers can be extracted based on their patch sizes.
img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Bilaterial filter and adaptive histogram thresholding to get background into mostly one patch
img = cv2.bilateralFilter(img, 9, 29, 29)
thresh = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 13, 0)
# Add padding to join any background around edges into the same patch
pad = 2
img_pad = cv2.copyMakeBorder(thresh, pad, pad, pad, pad, cv2.BORDER_CONSTANT, value = 1)
# Label patches and remove padding
ret, markers = cv2.connectedComponents(img_pad)
markers = markers[pad:-pad,pad:-pad]
# Count pixels in each patch
counts = [(markers==i).sum() for i in range(markers.max()+1)]
# Keep patches based on pixel counts
maxCount = 200 # removes large background patches
minCount = 40 # removes specs and centres of numbering
keep = [c<maxCount and c>minCount for c in counts]
output = markers.copy()
for i,k in enumerate(keep):
output[markers==i] = k
Here is what the images look like at each stage.

Related

How to detect colored text on gradient background with pytesseract

I'm currently working on a small OCR bot. I got pretty much everything to work and am now trying to improve the OCR. Specifically, it has problems with two things: the orange/red-ish text on the same colored gradient and for some reason the first 1 of "1/1". Sadly I haven't found anything that worked in my case yet. I've made a small test image, which is consisting of multiple images, below:
Source Image
Results
Adaptive Threshold
As you can see the gradient results in a blob that is sometimes big enough to overlap with the first word (see "apprentice") resulting in garbage.
I've tried many variations and played around with thresholds, blurs, erode, dilation, box detection with the dilation method, etc. but nothing worked well. The only way I did get rid of the blob is using an adaptive Threshold. But sadly I wasn't able to get good results using the output image.
If anyone knows how to make the OCR more robust, increase accuracy and get rid of the blob I'd appreciate your help. Thanks.
The following code is my 'playground' to figure out a better way:
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = YOUR_PATH
def resize(img, scale_percent=300):
# use this instead?
# resize = image = imutils.resize(image, width=300)
# automatically resizes it about 300% by default
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
return resized
def preprocessImage(img, scale=300, threshhold=127):
""" input RGB colour space """
# makes results more accurate - inspired from https://stackoverflow.com/questions/58103337/how-to-ocr-image-with-tesseract
# another resource to improve accuracy - https://tesseract-ocr.github.io/tessdoc/ImproveQuality.html
# converts from rgb to grayscale then enlarges it
# applies gaussian blur
# convert to b&w
# invert black and white colours (white background, black text)
grayscale = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
cv2.imshow('grayscale', grayscale)
resized = resize(grayscale, scale)
cv2.imshow('resized', resized)
blurred = cv2.medianBlur(resized, 5)
#cv2.imshow('median', blurred)
blurred = cv2.GaussianBlur(resized, (5, 5), 5)
cv2.imshow('1', blurred)
cv2.waitKey()
blackAndWhite = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cv2.imshow('blackAndWhite', blackAndWhite)
th3 = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
cv2.floodFill(th3, None, (0, 0), 255)
cv2.imshow('th3', th3)
#kernel = np.ones((3, 3), np.uint8)
#erode = cv2.erode(th3, kernel)
kernel = np.ones((5, 5), np.uint8)
#opening = cv2.morphologyEx(blackAndWhite, cv2.MORPH_OPEN, kernel)
invertedColours = cv2.bitwise_not(blackAndWhite)
return invertedColours
# excerpt from https://www.youtube.com/watch?v=6DjFscX4I_c
def imageToText(img):
# returns item name from image, preprocess if needed
boxes = pytesseract.image_to_data(img)
num = []
for count, box in enumerate(boxes.splitlines()):
if (count != 0):
box = box.split()
if (len(box) == 12):
text = box[11].strip('#®')
if (text != ''):
num.append(text)
text = ' '.join(num)
## Alternate method
# text = pytesseract.image_to_string(img)
# print("Name:", text)
return text
if __name__ == "__main__":
img = cv2.imread("test.png")
img = preprocessImage(img, scale=300)
print(imageToText(img))
##############################################
##### Detecting Words ######
##############################################
#[ 0 1 2 3 4 5 6 7 8 9 10 11 ]
#['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text']
boxes = pytesseract.image_to_data(img)
# convert back to colored image
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# draw boxes and text
for a,b in enumerate(boxes.splitlines()):
print(b)
if a!=0:
b = b.split()
if len(b)==12:
x,y,w,h = int(b[6]),int(b[7]),int(b[8]),int(b[9])
cv2.putText(img,b[11],(x,y-5),cv2.FONT_HERSHEY_SIMPLEX,1,(50,50,255),2)
cv2.rectangle(img, (x,y), (x+w, y+h), (0, 0, 255), 2)
cv2.imshow('img', img)
cv2.waitKey(0)

I couldn't get it perfect but almost...
I got a lot of benefit from CLAHE equalization. See tutorial here. But that wasn't enough. Still needed thresholding. Adaptive techniques didn't work well, but cv2.THRESH_TOZERO gives OK results. See thresholding tutorial here
import cv2
from pytesseract import image_to_string, image_to_data
img = cv2.imread('gradient.png', cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (0,0), fx=2.0, fy=2.0)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
img = clahe.apply(img)
img = 255-img # invert image. tesseract prefers black text on white background
ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_TOZERO)
cv2.imwrite('output.png', img)
ocr = image_to_string(img, config='--psm 6')
print(ocr)
which gives ocr output
Tool Crafting Part
Apprentice Craft Kit
Adept Craft Kit
Expert Craft Kit
=
Master Craft Kit
1/1

How can I improve color thresholding output?

See: Converting An Image To A Cartoon Using OpenCV
In the above article they have the following image:
And, they wanted to obtain an output like the following:
I ran the following script:
import cv2
window_name = 'image'
img = cv2.imread("photo.png")
cv2.imshow(window_name, img)
cv2.waitKey(0)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.medianBlur(gray, 5)
edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
cv2.imshow(window_name, edges)
cv2.waitKey(0)
color = cv2.bilateralFilter(img, 9, 250, 250)
cartoon = cv2.bitwise_and(color, color, mask=edges)
cv2.imshow(window_name, cartoon)
cv2.waitKey(0)
Firstly, the script is very slow.
Secondly, the output is not what they promised would be:
How can I fix these two issues?

One simple approach is to use stylization in Python/OpenCV in the Non-Photorealistic Rendering in the Computational Photography section to make a "cartoon". Reference to algorithm is at https://www.inf.ufrgs.br/~eslgastal/DomainTransform/Gastal_Oliveira_SIGGRAPH2011_Domain_Transform.pdf
Input:
import cv2
# read image
img = cv2.imread('beard_man.png')
# use mask with input to do inpainting
result = cv2.stylization(img, sigma_s=50, sigma_r=0.8)
# write result to disk
cv2.imwrite("beard_man_cartoon.png", result)
# display it
cv2.imshow("RESULT", result)
cv2.waitKey(0)
Result:

Brief description
I'm so interested in your question, so I tried your suggested website's code, the code you posted, and myself googled a few to tried. Even discussed with my peers, my professor who taught introductory image processing/computer vision using C# that I took couple years ago.
Discussion feedback
Sadly they all respond the same and like what I initially thought, it's not possible to transform/convert directly into the second picture in your post, the posted second picture is most likely to be an artistic graphics photo. Well, maybe you dig deeper maybe there's actually a module or library that can actually transform/convert it 100% like the second picture.
Examples code testing
So, I begin trying out the contents of your posted website, snipped a bit there, adjusted some, but overall, no where near to the second cartoon picture.
The code and result of "Converting An Image To A Cartoon Using OpenCV"
import cv2
from matplotlib import pyplot as plt
# Reading image
img = cv2.imread("img.png")
plt.imshow(img)
# Converting to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
# Detecting edges of the input image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.medianBlur(gray, 9)
edges = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 9, 9
)
# Cartoonifying the image
color = cv2.bilateralFilter(img, 9, 250, 250)
cartoon = cv2.bitwise_and(color, color, mask=edges)
plt.imshow(cartoon)
plt.savefig("cartoonify.png")
plt.show()
Moving on, then I tried your code in the post, and it's actually made some differences, and it doesn't run slow or didn't make changes. I ran your code, and it did made some change, the code stays pretty much the same, just added saving image methods at the end, cv2.imwrite().
import cv2
import matplotlib.pyplot as plt
window_name = "image"
img = cv2.imread("img.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.medianBlur(gray, 5)
edges = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY,
9, 9
)
color = cv2.bilateralFilter(img, 9, 250, 250)
cartoon = cv2.bitwise_and(color, color, mask=edges)
cv2.imshow(window_name, cartoon)
cv2.waitKey(0)
cv2.imwrite("cartoon_op.png", cartoon)
cv2.waitKey(0)
cv2.destroyAllWindows()
The third, I searched on github, found this code, but for this I used my stackoverlfow profile picture, which it's a headshot, I thought maybe the white background would make more visible difference, but it didn't, compared to previous examples, it's pretty much close.
import cv2
import numpy as np
from tkinter.filedialog import *
photo = askopenfilename()
img = cv2.imread(photo)
grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
grey = cv2.medianBlur(grey, 5)
edges = cv2.adaptiveThreshold(grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
#cartoonize
color = cv2.bilateralFilter(img, 9, 250, 250)
cartoon = cv2.bitwise_and(color, color, mask = edges)
cv2.imshow("Image", img)
cv2.imshow("Cartoon", cartoon)
#save
cv2.imwrite("cartoon-git.png", cartoon)
cv2.waitKey(0)
cv2.destroyAllWindows()
Just before almost finished with the answer, I found this example
gives the closest result of cartoonized picture example on Dev -
How to cartoonize an image with Python, this example used Elon
Musk's photo to demonstrate, although it's the closest to cartoon,
but the size somehow just got really small.
import numpy as np
import cv2
file_name = "elon.jpg"
def resize_image(image):
scale_ratio = 0.3
width = int(image.shape[1] * scale_ratio)
height = int(image.shape[0] * scale_ratio)
new_dimensions = (width, height)
resized = cv2.resize(
image, new_dimensions,
interpolation=cv2.INTER_AREA
)
return resized
def find_countours(image):
contoured_image = image
gray = cv2.cvtColor(contoured_image, cv2.COLOR_BGR2GRAY)
edged = cv2.Canny(gray, 30, 100)
contours, hierarchy = cv2.findContours(
edged, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE
)
cv2.drawContours(
contoured_image, contours,
contourIdx=-1, color=1,
thickness=1
)
cv2.imshow("Image after contouring", contoured_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
return contoured_image
def color_quantization(image, k=4):
z = image.reshape((-1, 3))
z = np.float32(z)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,
10000, 0.0001)
compactness, label, center = cv2.kmeans(z, k, None, criteria,
1, cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape((image.shape))
return res2
if __name__ == '__main__':
image = cv2.imread(file_name)
resized_image = resize_image(image)
coloured = color_quantization(resized_image)
contoured = find_countours(coloured)
final_image = contoured
save_q = input("Save the image? [y]/[n]: ")
if save_q == "y":
cv2.imwrite("cartoonized_" + file_name, final_image)
print("Image saved!")
Original Elon.jpg
Cartoonized Elon.jpg
Wrapping up
I hope this long answer that sounded like no definitive answer helps, it's just what I found interested and decided to share the process of discovering it.

Grayscaled image has dark lower border

I am using opencv to take images using my webcam.
cam = cv2.VideoCapture(0)
cv2.namedWindow("Handwritten Number Recognition")
img_counter = 0
while True:
ret, frame = cam.read()
if not ret:
print("failed to grab frame")
break
cv2.imshow("Handwritten Number Recognition", frame)
k = cv2.waitKey(1)
if k%256 == 27:
# ESC pressed
print("Prediction is underway...")
break
elif k%256 == 32:
# SPACE pressed
img_name = "opencv_frame_{}.png".format(img_counter)
cv2.imwrite(img_name, frame)
print("Image taken!")
img_counter += 1
cam.release()
cv2.destroyAllWindows()
I then convert the image into grayscale and downsize it:
user_test = img_name
col = Image.open(user_test)
gray = col.convert('L')
bw = gray.point(lambda x: 0 if x<100 else 255, '1')
bw.save("bw_image.jpg")
bw
img_array = cv2.imread("bw_image.jpg", cv2.IMREAD_GRAYSCALE)
img_array = cv2.bitwise_not(img_array)
plt.imshow(img_array, cmap = plt.cm.binary)
plt.show()
img_size = 28
new_array = cv2.resize(img_array, (img_size,img_size))
final_array = new_array.reshape(1,-1)
plt.imshow(new_array, cmap = plt.cm.binary)
plt.show()
But the images have a very dark patch in the bottom which hampers the predictions I want to make with my data:
Original image:
What can I do to get past this problem? Interestingly this only happens if I click the image using opencv. If I use an image clicked through the same webcam but through the camera appliucation the error is not visible (Adding path of the image for preprocessing).

You have two options:
Choosing a better global threshold for the gray values. This is the easier less generic solution. Normally, people would choose the Otsu method to automatically select the optimal threshold. Have a look at: Opencv Thresholding Tutorial
threshold, dst_img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)
Using an adaptive threshold. Adaptive simply means using a calculated threshold for each sliding window location based on some criteria. Have a look at: Niblack's Binarization methods
Using option one:
img = cv2.imread("thresh.jpg", cv2.IMREAD_GRAYSCALE)
threshold, img = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)
cv2.imwrite("thresh_bin.jpg", img)
Output:

this problem because of used thresholding method
bw = gray.point(lambda x: 0 if x<100 else 255, '1')
to solve this you can change the low limit value (100) to 75 or using opencv auto threshold
the, bw = cv2.threshold(gray_img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

Here is one way to do that in Python/OpenCV using division normalization, thresholding and some morphology.
Input:
import cv2
import numpy as np
# read the image
img = cv2.imread('five.jpg')
# convert to gray
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# blur
smooth = cv2.GaussianBlur(gray, (555,555), 0)
# divide smooth by gray image
division = cv2.divide(smooth, gray, scale=255)
# invert
division = 255 - division
# threshold
thresh = cv2.threshold(division, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# add white border to help morphology close
border = cv2.copyMakeBorder(thresh, 60,60,60,60, cv2.BORDER_CONSTANT, value=(255,255,255))
hh, ww = border.shape
# morphology close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (29,29))
result = cv2.morphologyEx(border, cv2.MORPH_CLOSE, kernel)
# remove border
result = result[60:hh-60, 60:ww-60]
# save results
cv2.imwrite('five_division_threshold.jpg',result)
# show results
cv2.imshow('smooth', smooth)
cv2.imshow('division', division)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

Detect colorful dots in image in python?

I'm trying to detect colorful dots on a white/gray background. The dots are 3 different colors (yellow, purple, blue) of different sizes. Here is the original image:
I converted the image to HSV and found lower and upper bounds for each image then applied contour detection to find those dots. The following code detects most of the dots:
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread('image1_1.png')
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower_yellow = np.array([22,25,219])
upper_yellow = np.array([25,75,225])
lower_purple = np.array([141,31,223])
upper_purple = np.array([143,83,225])
lower_blue = np.array([92,32,202])
upper_blue = np.array([96,36,208])
mask_blue = cv2.inRange(hsv, lower_blue, upper_blue)
mask_purple = cv2.inRange(hsv, lower_purple, upper_purple)
mask_yellow = cv2.inRange(hsv, lower_yellow, upper_yellow)
res_blue = cv2.bitwise_and(img,img, mask=mask_blue)
res_purple = cv2.bitwise_and(img,img, mask=mask_purple)
res_yellow = cv2.bitwise_and(img,img, mask=mask_yellow)
gray_blue = cv2.cvtColor(res_blue, cv2.COLOR_BGR2GRAY)
gray_purple = cv2.cvtColor(res_purple, cv2.COLOR_BGR2GRAY)
gray_yellow = cv2.cvtColor(res_yellow, cv2.COLOR_BGR2GRAY)
_,thresh_blue = cv2.threshold(gray_blue,10,255,cv2.THRESH_BINARY)
_,thresh_purple = cv2.threshold(gray_purple,10,255,cv2.THRESH_BINARY)
_,thresh_yellow = cv2.threshold(gray_yellow,10,255,cv2.THRESH_BINARY)
contours_blue, hierarhy1 = cv2.findContours(thresh_blue,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
contours_purple, hierarhy2 = cv2.findContours(thresh_purple,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
contours_yellow, hierarhy3 = cv2.findContours(thresh_yellow,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
result = img.copy()
cv2.drawContours(result, contours_blue, -1, (0, 0, 255), 2)
cv2.drawContours(result, contours_purple, -1, (0, 0, 255), 2)
cv2.drawContours(result, contours_yellow, -1, (0, 0, 255), 2)
cv2.imwrite("_allContours.jpg", result)
Here are the detected contours:
The problem is that some of the colored dots are not detected. I understand by fine-tuning the color ranges (lower and upper) it's possible to detect more dots. But that is very time consuming and not generalizable to similar images. For example the following image looks similar to the first image above and has the same colorful dots but the background is slightly different, once I ran it through above code it was not able to detect even one of the dots. Am I on the right track? Is there a more scalable and reliable solution with less need to tune color parameters in order to solve this problem? Here is the other image I tried:

I would suggest simply using adaptiveThreshold in Python/OpenCV
import cv2
import numpy as np
# read image
img = cv2.imread("dots.png")
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do adaptive threshold on gray image
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 25, 6)
# write results to disk
cv2.imwrite("dots_thresh.jpg", thresh)
# display it
cv2.imshow("thresh", thresh)
cv2.waitKey(0)

OpenCV - Smoothing borders

I want to stitch multiple image patches to a new and mainly gray background image. The image patches contain colored elements which shall not be changed, if possible. Their shape and color is diverse. Like the new background image the borders of the image patches are also gray, just slightly different, but you can see strong borders if I just go by
ImgPatch = cv2.imread("C://...//ImagePatch.png")
NewBackground = cv2.imread("C://...//NewBackground.png")
height, width, channels = ImgPatch.shape
NewBackground[y:y+height,x:x+width] = ImgPatch
I tried cv2.seamlessClone() (docs.opencv.org) as explained in this tutorial:
www.learnopencv.com/seamless-cloning-using-opencv-python-cpp
The edges are perfectly smoothed, but unfortunately the colors of the elements are changed way too much. I know the approximate width and height of the gray border of each image patch. If i could specifically smooth that area that may be a start and lets the result look already better than what I have. I tried different masks with cv2.seamlessClone(), of which none of the tried ways workes. So unfortunately I couldn't find a correct way to blend only the border of the patches so far.
The following images visualize my problem in a very abstract way.
What I have:
Left: Background, Right: Image patch
What I want:
What I currently get by using cv2.seamlessClone():
Any help would be very much appreciated!
EDIT As I probably was not clear enough: The real images are way more complex and so unfortunately I can not get reasonable results for all image patches by using cv2.findContour... What I am looking for is a method to merge the borders, so you can not see the exact transition of patch to background anymore.

patch = cv2.imread('patch.png', cv2.IMREAD_UNCHANGED);
image = cv2.imread('image.png', cv2.IMREAD_UNCHANGED);
mask = 255 * np.ones(patch.shape, patch.dtype)
width, height, channels = image.shape
center = (height//2, width//2)
mixed_clone = cv2.seamlessClone(patch, image, mask, center, cv2.cv2.NORMAL_CLONE)

You could try to find contour in your image patch with cv2.findContour() (red spot). Then remove the background of the contour and save the image. You can finally combine the one you saved (red spot without background) with the gray background image with cv2.add(). I have combined some code I once played with and the code in OpenCV docs (for cv2.add()). Hope it helps a bit (Note the example ads the image in upper left corner - if you want elswhere you should change the code). Cheers!
Example:
import cv2
import numpy as np
from PIL import Image
img = cv2.imread('background2.png', cv2.IMREAD_UNCHANGED)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, threshold = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
height,width = gray.shape
mask = np.zeros((height,width), np.uint8)
_, contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask,[cnt], -1, (255,255,255),thickness=-1)
masked = cv2.bitwise_and(img, img, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
x,y,w,h = cv2.boundingRect(contours[0])
circle = masked[y:y+h,x:x+w]
cv2.imwrite('temp.png', circle)
cv2.waitKey(0)
cv2.destroyAllWindows()
img = Image.open('temp.png')
img = img.convert("RGBA")
datas = img.getdata()
newData = []
for item in datas:
if item[0] == 0 and item[1] == 0 and item[2] == 0:
newData.append((255, 255, 255, 0))
else:
newData.append(item)
img.putdata(newData)
img.save('background3.png', "PNG")
img1 = cv2.imread('background1.png')
img2 = cv2.imread('background3.png')
rows,cols,channels = img2.shape
roi = img1[0:rows, 0:cols ]
img2gray = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray, 110, 255, cv2.THRESH_BINARY_INV)
mask_inv = cv2.bitwise_not(mask)
img1_bg = cv2.bitwise_and(roi,roi,mask = mask_inv)
img2_fg = cv2.bitwise_and(img2,img2,mask = mask)
dst = cv2.add(img1_bg,img2_fg)
img1[0:rows, 0:cols] = dst
cv2.imshow('img',img1)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.