problem when trying to save images when using sliding window - python

I want to save in a 3-dimensional matrix, the images obtained when I apply a sliding window algorithm. When using this code when I apply the changes behind the ## I get an error. I think the code is correct, any ideas?
window is the matriz (100,100) that i want save in A
## import the necessary packages
import cv2
import matplotlib.pyplot as plt
import numpy as np
import imutils
import time
img= cv2.imread("2.jpg") # your image path
image = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
def pyramid(image, scale=1.5, minSize=(30, 30)):
# yield the original image
yield image
# keep looping over the pyramid
while True:
# compute the new dimensions of the image and resize it
w = int(image.shape[1] / scale)
image = imutils.resize(image, width=w)
# if the resized image does not meet the supplied minimum
# size, then stop constructing the pyramid
if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
break
# yield the next image in the pyramid
yield image
def sliding_window(image, stepSize, windowSize):
# slide a window across the image
for y in range(0, image.shape[0], stepSize):
for x in range(0, image.shape[1], stepSize):
# yield the current window
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
(winW, winH) = (100, 100)
# declarate the matrix
A=np.empty((100,winW,winH))
b=1
window=[]
#%%
# loop over the image pyramid
for resized in pyramid(image, scale=2):
# loop over the sliding window for each layer of the pyramid
for (x, y, window) in sliding_window(resized, stepSize=32, windowSize=(winW, winH)):
# if the window does not meet our desired window size, ignore it
if window.shape[0] != winH or window.shape[1] != winW:
continue
# THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
# MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
# WINDOW
# since we do not have a classifier, we'll just draw the window
clone = resized.copy()
cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2)
cv2.imshow("Window", clone)
cv2.waitKey(1)
time.sleep(0.25)
#______ ###### the problem #### ______ i want to save the windows obtein in A, but I get the error in the next two lines of code
## A[b,:,:]=window
## b=b+1;
cv2.destroyAllWindows()
i want to save the windows obtein in A, but I get the error in the next two lines of code
## A[b,:,:]=window

Related

Cannot use any mouse movement packages with python after using image recognition

ive been trying to use a script with cv2 and numpy to search an image for an icon, th start button for a stress test software, and that all works fine but it doesnt work together with any mouse movement packages, ive tried mouse, pyautogui, autoit and a few more with no results. the mouse movement works if i put it before the cv2 image recognition script but that wont work cause i need the coordinates.
P.S. 100, 500 is not the coordinates i want to move to, i want to move to the "coords" variable which should be equal to (pt[0] + w, pt[1] + h)
import cv2
import numpy as np
#import pyautogui
import time
import win32api
import pydirectinput
def movedamnyou():
pydirectinput.moveTo(100, 500)
#open OCCT
win32api.ShellExecute(0, "open", "D:\AppData\OCCT.exe", None, ".", 0)
#wait for program to open
time.sleep(20)
#classifier
#take screenshot
#im2 = pyautogui.screenshot('scrn.png')
# Read the main image
img_rgb = cv2.imread('scrn.png')
# Convert it to grayscale
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
# Read the template
template = cv2.imread('OCCTico.png', 0)
# Store width and height of template in w and h
w, h = template.shape[::-1]
# Perform match operations.
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
# Specify a threshold
threshold = 0.8
# Store the coordinates of matched area in a numpy array
loc = np.where(res >= threshold)
# Draw a rectangle around the matched region.
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 2)
# Show the final image with the matched area.
cv2.imshow('Detected', img_rgb)
#print the output position
coords = (pt[0] + w, pt[1] + h)
pydirectinput.moveTo(100, 500)
import pyautogui
pyautogui.moveTo(coords[0], coords[1])

Line detection issue - OpenCV in Python

I have written the following script with which I aim to detect lines in Gazebo (a simulation environment):
#!/usr/bin/env python
# rospy for the subscriber
import rospy
# ROS Image message
from sensor_msgs.msg import Image
# ROS Image message -> OpenCV2 image converter
from cv_bridge import CvBridge, CvBridgeError
# OpenCV2 for saving an image
import cv2
import matplotlib.pyplot as plt
import numpy as np
def gradient(img):
# grayscale the image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# gaussian blur of image with a 5x5 kernel
gauss = cv2.GaussianBlur(gray,(5,5),0)
# Return the canny of the image
return cv2.Canny(gauss,20,30)
def region_of_interest(img):
# Height of image (number of rows)
height = img.shape[0]
# Width of the image (number of columns)
width = img.shape[1]
# Create an array of polygons to use for the masking of the canny image
polygons = np.array([
[(200,height), (200,500), (600,500), (600,height)]
])
# Create the mask image's background (black color)
mask_bg = np.zeros_like(img)
# Create the mask image (image with black background an white region of interest)
mask = cv2.fillPoly(mask_bg, polygons, 255)
# Isolate the area of interest using the bitwise operator of the mask and canny image
masked_image = cv2.bitwise_and(img,cv2.fillPoly(mask_bg, polygons, 255))
# Return the updated image
return masked_image
def make_coordinates(img, line_parameters):
# Extract the average slope and intercept of the line
slope, intercept = line_parameters
# Coordinate y(1) of the calculated line
y1 = img.shape[0]
# Coordinate y(2) of the calculated line
y2 = int(y1*0.5)
# Coordinate x(1) of the calculated line
x1 = int((y1-intercept)/slope)
# Coordinate x(2) of the calculated line
x2 = int((y2-intercept)/slope)
# Return the coordinates of the average line
return np.array([x1,y1,x2,y2])
def average_slope_intercep(img,lines):
# Create an empty list containing the coordinates of the detected line
line_fit = []
# Loop through all the detected lines
for line in lines:
# Store the coordinates of the detected lines into an 1D array of 4 elements
x1,y1,x2,y2 = line.reshape(4)
# Create a line y = mx+b based on the coordinates
parameters = np.polyfit((x1,x2),(y1,y2),1)
# Extract the slope m
slope = parameters[0]
# Extract the intercept b
intercept = parameters[1]
# Add elements on the list
line_fit.append((slope,intercept))
# Check slope of line
# if slope < 0:
# continue
# else:
# continue
# Calculate the average of the line fit parameters list
line_fit_average = np.average(line_fit,axis=0)
# Extract the coordinates of the calculated line
main_line = make_coordinates(img,line_fit_average)
return np.array([main_line])
def display_lines(img,lines):
# Create a mask image that will have the drawn lines
line_image = np.zeros_like(img)
# If no lines were detected
if lines is not None:
# Loop through all the lines
for line in lines:
# Store the coordinates of the first and last point of the lines into 1D arrays
x1, y1, x2, y2 = line.reshape(4)
# Draw the lines on the image with blue color and thicknes of 10
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),10)
# Return the mask image with the drawn lines
return line_image
def image_callback(msg):
# print("Received an image!")
# Instantiate CvBridge
bridge = CvBridge()
try:
# Convert your ROS Image message to OpenCV2
frame = bridge.imgmsg_to_cv2(msg, "bgr8")
except CvBridgeError, e:
print(e)
else:
# Copy of the original frame
frame_copy = np.copy(frame)
# Canny of image
canny_frame = gradient(frame_copy)
# Apply mask in region of interest
cropped_image = region_of_interest(canny_frame)
# Apply Hough Transform on the region of interest
lines = cv2.HoughLinesP(cropped_image,1,np.pi/180,30,np.array([]),minLineLength=10,maxLineGap=2)
# Calculate the average slope of the detected lines
averaged_lines = average_slope_intercep(frame_copy,lines)
# Create a mask image with the drawn lines
line_image = display_lines(frame_copy,averaged_lines)
# Plot lines on the camera feed frame
combo_image = cv2.addWeighted(frame_copy,0.8,line_image,1,1)
#Show manipulated image feed
cv2.imshow("Result feed", frame_copy)
# plt.imshow(canny_frame)
cv2.waitKey(1)
# plt.show()
def main():
rospy.init_node('image_listener')
# Define your image topic
image_topic = "rover/camera1/image_raw"
# Set up your subscriber and define its callback
rospy.Subscriber(image_topic, Image, image_callback)
# Spin until ctrl + c
rospy.spin()
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code is integrated in ROS, so please focus your attention at the image_callback function. My issue is that the line that I want to detect is quite noisy and I cannot figure out how to detect it correctly.
To be more specific, from the following frame,
Original Frame
I get this image after gaussian blur and the canny algorithm,
Canny Frame
How could I filter the "noise" I see in the canny frame? I played a lot with the canny and gausian blur parameters but all that I have achieved is removing gradients instead of actually making it less "noisy".
This method might help you to remove noise from the frame.
import cv2
import numpy as np
from skimage.morphology import skeletonize
def get_skeleton_iamge(threshold_image):
skeleton = skeletonize(threshold_image / 255)
skeleton = skeleton.astype(np.uint8)
skeleton *= 255
return skeleton
image = cv2.imread("road.png", 0)
image = cv2.resize(image, (300, 300))
bilateral = cv2.bilateralFilter(image, 15, 100, 100)
cv2.imshow("bilateral_image", bilateral)
canny_image = cv2.Canny(bilateral, 20, 30)
cv2.imshow("canny_image", canny_image)
kernel = np.ones((10, 10))
dilate_image = cv2.dilate(canny_image, kernel, iterations=1)
erode_image = cv2.erode(dilate_image, kernel, iterations=1)
cv2.imshow("erode_image", erode_image)
skeleton_iamge = get_skeleton_iamge(erode_image)
cv2.imshow("skeleton_iamge", skeleton_iamge)
cv2.waitKey(0)

How to make a shape larger or smaller without changing the resolution of the image using OpenCV or PIL in Python

I would like to be able to make a certain shape in either a PIL image or an OpenCV image 3 times larger and smaller without changing the resolution of the image or changing the shape of the shape I want to make larger. I have tried using OpenCV's dilation method but that is not it's intended use, plus it changed the shape of the image. For an example:
Thanks.
Here's a way of doing it:
find the interesting shape, i.e. non-white ROI area
extract it
scale it up by a factor
clear the original image to white
paste the scaled ROI back into image with same centre
#!/usr/bin/env python3
import cv2
import numpy as np
if __name__ == "__main__":
# Open image
orig = cv2.imread('image.png',cv2.IMREAD_COLOR)
# Get extent of interesting part, i.e. non-white part
y, x, _ = np.nonzero(~orig)
y0, y1 = np.min(y), np.max(y) # top and bottom rows
x0, x1 = np.min(x), np.max(x) # left and right cols
h, w = y1-y0, x1-x0 # height and width
ROI = orig[y0:y1, x0:x1] # extract ROI
cv2.imwrite('ROI.png', ROI) # DEBUG only
# Upscale ROI
factor = 3
scaledROI = cv2.resize(ROI, (w*factor,h*factor), interpolation=cv2.INTER_NEAREST)
newH, newW = scaledROI.shape[:2]
# Clear original image to white
orig[:] = [255,255,255]
# Get centre of original shape, and position of top-left of ROI in output image
cx, cy = (x0 + x1) //2, (y0 + y1)//2
top = cy - newH//2
left = cx - newW//2
# Paste in rescaled ROI
orig[top:top+newH, left:left+newW] = scaledROI
cv2.imwrite('result.png', orig)
That transforms this:
to this:
Puts me in mind of a pantograph:

Find average location of pixel of a given color

I am making an automatic curve detection program (graphs, 2D plots, etc.) and have gotten stuck with trying to find the average location of a color. The color is determined based on a K means clustering, so I have the RGB value for that color. I have also built a small sliding window program to follow a set of (X,Y) coordinates, and in that window is where the average location/centroid for a particular color needs to be found. I am very new to OpenCV, so I will detail what I want to do below, and appreciate any help.
Find the 3 most common colors with K-means and obtain the RGB values of those colors (done).
Use the sliding window to establish an N x N search window that rides along a determined X,Y path (done).
Apply a mask to get the curve color (based on Kmeans) extracted in the search window (need help). Also potentially mask out the background color based on Kmeans.
Find the average X,Y coordinate/pixel location for the desired curve color in the NxN search window (need help).
It would be appreciated if anyone could help with masking the color of the image based on the RGB value in OpenCV and then determining the average pixel location of the desired color in the search window. I have included the code that I have so far. The boundaries would come from the K-means clustering, but right now they are just hard coded for testing purposes.
Below is an example curve, where I would attempt to follow any of the black lines
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import math
import time
from tkinter import *
from tkinter import filedialog
from tkinter import ttk
from tkinter.filedialog import askopenfilename
from tkinter import scrolledtext
from PIL import Image, ImageTk
import os
import ReadCoords
from sklearn.cluster import KMeans
(winW, winH) = (12,12)
xs,ys = ReadCoords.read_coords()
image = cv2.imread(r"Curve.PNG")
boundaries = [([0,0,0],[128,128,128])]
def color_detection(image, boundaries):
for (lower,upper) in boundaries:
lower = np.array(lower, dtype = 'uint8')
upper = np.array(upper, dtype = 'uint8')
mask = cv2.inRange(image, lower, upper)
output = cv2.bitwise_and(image, image, mask = mask)
return output
def kmeans_colors(image = image):
org_clone = image.copy()
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = image.reshape((image.shape[0] * image.shape[1], 3))
# image = image.reshape((image.shape[0] * image.shape[1], 1))
clt = KMeans(n_clusters = 3)
clt.fit(image)
# print(clt.cluster_centers_)
def centroid_histogram(clt):
# grab the number of different clusters and create a histogram
# based on the number of pixels assigned to each cluster
numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
(hist, _) = np.histogram(clt.labels_, bins = numLabels)
# normalize the histogram, such that it sums to one
hist = hist.astype("float")
hist /= hist.sum()
# return the histogram
return hist
def plot_colors(hist, centroids):
# initialize the bar chart representing the relative frequency
# of each of the colors
bar = np.zeros((50, 300, 3), dtype = "uint8")
startX = 0
# loop over the percentage of each cluster and the color of
# each cluster
for (percent, color) in zip(hist, centroids):
# plot the relative percentage of each cluster
endX = startX + (percent * 300)
cv2.rectangle(bar, (int(startX), 0), (int(endX), 50),
color.astype("uint8").tolist(), -1)
startX = endX
# return the bar chart
return bar
hist = centroid_histogram(clt)
bar = plot_colors(hist, clt.cluster_centers_)
return clt.cluster_centers_
def curve_sliding_window(image, step_size, window_size, x_range, y_range):
for x,y in zip(x_range, y_range):
yield (x, y, image[y:y+window_size[1], x:x+window_size[0]])
for (x, y, window) in curve_sliding_window(image, step_size=8, window_size=(winW, winH), x_range = xs, y_range = ys ):
# if the window does not meet our desired window size, ignore it
if window.shape[0] != winH or window.shape[1] != winW:
continue
# THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
# MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
# WINDOW
# since we do not have a classifier, we'll just draw the window
clone = image.copy()
cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 0, 255), 2)
cv2.imshow("Window", clone)
# cv2.waitKey(1)
# time.sleep(0.01)
masked = color_detection(window, boundaries)
cX,cY = moment_centriod(contours(window))
if cX != 3 and cY != 3:
cv2.circle(window, (cX, cY), 1, (0,0,255), 2)
cv2.imshow("windows", np.hstack([masked, window]))
cv2.waitKey(50)

How can I randomly add 20 images(10x10) in an empty background image (200x200) in python?

I want the 10 small images to be placed in this circle
I'm working on a small project to randomly place or put several images of size (10 w x 10 h) in another image that will be used as background of size (200 w x 200 h) in python. The small images should be put at a random location in the background image.
I have 20 small images of size (10x10) and one empty image background of size (200x200). I want to put my 20 small images in the empty background image at a random location in the background.
Is there a way to do it in Python?
Code
# Depencies importation
import cv2
# Saving directory
saving_dir = "../Saved_Images/"
# Read the background image
bgimg = cv2.imread("../Images/background.jpg")
# Resizing the bacground image
bgimg_resized = cv2.resize(bgimg, (2050,2050))
# Read the image that will be put in the background image (exemple of 1)
small_img = cv2.imread("../Images/small.jpg")
# Convert the resized background image to gray
bgimg_gray = cv2.cvtColor(bgimg, cv2.COLOR_BGR2GRAY)
# Convert the grayscale image to a binary image
ret, thresh = cv2.threshold(bgimg_gray,127,255,0)
# Determine the moments of the binary image
M = cv2.moments(thresh)
# calculate x,y coordinate of center
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
# drawing the circle in the background image
circle = cv2.circle(bgimg, (cX, cY), 930, (0,0,255), 9)
print(circle)
# Saving the new image
cv2.imwrite(saving_dir+"bgimg"+".jpg", bgimg)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.resizeWindow("Test", 1000, 1200)
# Showing the images
cv2.imshow("image", bgimg)
# Waiting for any key to stop the program execution
cv2.waitKey(0)
the above code is for one image, I want to do it for the 20 and to put them in a random location
Assuming you have that background image background.jpg (decreased to 200x200 px) and 10 images: image01.png, image02.png ... image10.png (10x10 px). Then:
import glob
import random
from PIL import Image
img_bg = Image.open('circle.jpg')
width, height = img_bg.size
images = glob.glob('*.png')
for img in images:
img = Image.open(img)
x = random.randint(40, width-40)
y = random.randint(40, height-40)
img_bg.paste(img, (x, y, x+10, y+10))
img_bg.save('result.png', 'PNG')
Output image:

Categories