I am new in machine vision and i was trying to detect objects. I watched a YouTube video and i did exactly what the teacher did! He tried the code for car and person and also the only true output is related to pictures of car and person but i want to use it to detect fruits.
Also the code is:
import cv2
import matplotlib.pyplot as plt
import numpy as np
config_file = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
frozen_model = "frozen_inference_graph.pb"
model = cv2.dnn_DetectionModel(frozen_model, config_file)
classlabels = []
file_name = 'labels.txt'
with open(file_name, 'rt') as fpt:
classlabels = fpt.read().rstrip('\n').split('\n')
model.setInputSize(320,320)
model.setInputScale(1.0/127.5)
model.setInputMean((127.5,127.5,127.5))
model.setInputSwapRB(True)
# Read the image
img = cv2.imread('person.jpg')
ClassIndex, confidence, bbox = model.detect(img, confThreshold=0.5)
print(ClassIndex)
font_scale = 3
font = cv2.FONT_HERSHEY_PLAIN
for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox):
cv2.rectangle(img,boxes,(255,0,0),2)
cv2.putText(img,classlabels[ClassInd-1],(boxes[0]+10,boxes[1]+40), font, fontScale=font_scale,color=(0,255,0), thickness=3)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.show()
The ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt link: https://gist.github.com/dkurt/54a8e8b51beb3bd3f770b79e56927bd7
Coco names link: https://github.com/pjreddie/darknet/blob/master/data/coco.names
frozen_inference_graph.pb link: https://drive.google.com/file/d/19seKEz-HYBkY_4DZV9ieu4gxgOnPxqhs/view
Related
I need your help to modify the following code!
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
class CarvanaDataset(Dataset):
def __init__(self, image_dir, mask_dir, transform=None):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.images = os.listdir(image_dir)
def __len__(self):
return len(self.images)
def __getitem__(self, index):
img_path = os.path.join(self.image_dir, self.images[index])
mask_path = os.path.join(self.mask_dir, self.images[index].replace(".jpg", "_mask.gif"))
image = np.array(Image.open(img_path).convert("RGB"))
mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32)
mask[mask == 255.0] = 1.0
if self.transform is not None:
augmentations = self.transform(image=image, mask=mask)
image = augmentations["image"]
mask = augmentations["mask"]
return image, mask
When I used color image datasets (from kaggle as recommended by the author of the code) the above code loads color image datasets successfully.
But, when I changed the dataset and used grayscale image datasets (both images and masks) the script generate the following error:
PIL.UnidentifiedImageError: cannot identify image file 'data/val_images/zoazoa_251.tiff'
Can you please help to fix this?
Note that the script file of interest "dataset.py" can be found here: CLIK
I'm in the process of developing a Flask Python application that's able to find a logo within different background images. The goal is to have a solution that states "yes, the logo is in the background", or "no, it doesn't look like the logo is in the background". I'm utilizing the SIFT package to match keypoints between the logo and the image with the logo in the background.
I want to figure out how I can write an IF statement that's able to deliver the message above depending on the keypoints. Is there anyone that can give me guidance on the first steps of doing this? I'll attach the code to this message (note that the function should have an indent there):
#app.route('/identify/<filename>')
def identify(filename):
""" After uploading the image,
show the identification of the uploaded image
"""
# TODO: Logic to load the uploaded image filename and identify other images
image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
image_url = url_for('images', filename=filename)
background_image = cv2.imread(image_path)
background_image = cv2.cvtColor(background_image, cv2.COLOR_BGR2RGB)
#still need to figure out how to load a second photo of the user's choice
#loading in the photo to find within the uploaded photo
logo = cv2.imread("chevy-open-road-logo-300.png")
logo = cv2.cvtColor(logo, cv2.COLOR_BGR2RGB)
#creating a copy and implementing a SIFT Image Matching method
background_image_copy = background_image.copy()
#sift
sift = cv2.xfeatures2d.SIFT_create()
background_keypoints, background_descriptors = sift.detectAndCompute(background_image_copy, None)
logo_keypoints, logo_descriptors = sift.detectAndCompute(logo, None)
#feature matching
bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)
matches = bf.match(background_descriptors,logo_descriptors)
matches = sorted(matches, key = lambda x:x.distance)
#img = BytesIO()
image_match = cv2.drawMatches(background_image_copy, background_keypoints, logo, logo_keypoints, matches[:45], logo, flags=2)
plt.imshow(image_match), plt.show()
'''
#Converting the plot to PNG image
plt.savefig(img, format='png', bbox_inches='tight')
img.seek(0)
plot_url = base64.b64encode(img.getvalue()).decode()
'''
'''
#Printing the number of keypoints detected in the training image
x = str(len(background_keypoints))
#Printing the number of keypoints detected in the logo image
y = str((len(logo_keypoints)))
# Printing total number of matching points between the training and logo images
z = str((len(matches)))
'''
return render_template('identify.html',
image = image_url#, plot_url=plot_url
#x=x, y=y, z=z
)
I've created a simple Python application that uses the CV2 computer vision library to recognise a template image on a webpage.
I give the application a template image that it needs to recognise on the source image. In this case, the source image is a screenshot of the website www.google.com and the template image is the Google search button.
Template image
I thought the application worked at first, but it's drawing the rectangle completely in the wrong place on the input (source) image. I've added a picture below of where the application located the template image.
Result
Here's the source code.
Main Application Source
import cv2
import numpy
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
class Automate:
def __init__(self):
chrome_options = Options()
chrome_options.add_argument("kiosk")
self.driver = webdriver.Chrome(ChromeDriverManager("93.0.4577.63").install(), options=chrome_options)
#self.driver = webdriver.Chrome(executable_path='./chromedriver',options=chrome_options)
self.screenShot = None
self.finalImage = None
def open_webpage(self, url):
print(f"Open webpage {url}")
self.driver.get(url)
def close_webpage(self):
Event().wait(5)
self.driver.close()
print("Closing webpage")
def snap_screen(self):
print("Capturing screen")
self.screenShot = "screenshot.png"
self.driver.save_screenshot(self.screenShot)
print("done.")
def match(self, image, template):
# convert images to greyscale.
src = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2GRAY)
temp = cv2.cvtColor(cv2.imread(template), cv2.COLOR_BGR2GRAY)
cv2.imshow("out", temp)
cv2.waitKey(0)
height, width = src.shape
H, W = temp.shape
result = cv2.matchTemplate(src, temp, cv2.cv2.TM_CCOEFF_NORMED)
minVal, maxVal, minLoc, maxLoc = cv2.minMaxLoc(result)
location = maxLoc
bottomRight = (location[0] + W, location[1] + H)
src2 = cv2.imread(image)
cv2.rectangle(src2, location, bottomRight, (0, 0, 255), 5)
cv2.imshow("output", src2)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
url = "http://www.google.com"
auto = Automate()
auto.open_webpage(url)
auto.snap_screen()
auto.close_webpage()
match_image = "images/templates/google-button.png"
# Match screenshot with template image.
auto.check_match(
image=auto.screenShot,
template=match_image
)
I'd appreciate any help or advice on how to solve this issue.
Update
Following the advice given by user zteffi, I resized my template image to the correct image dimensions. After doing this, the match template function works as expected.
You want to make sure that your template image is a close as possible to the actual size of the image you want to be located in the base image. In my case, this was around 150 x 150 or 200 x 200 so that it will be easier to find the button.
I just want to add a shadow to the shapes that I am creating while using python-pptx.
I have read as many documents about using shadows in python-pptx as I can find but I can not figure out how to actually do it.
I tried shadow = shape.shadow to create a 'ShadowFormat' object but when I try to do shadow.visible I get the error AttributeError: 'ShadowFormat' object has no attribute 'visible'
If anyone could explain how this is done and give an example it would be much appreciated!
Extra info:
This is the page linking to the topic: https://python-pptx.readthedocs.io/en/latest/dev/analysis/shp-shadow.html however there is no example on how to create a shadow for a shape in powerpoint.
I have imported the following modules:
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.action import PP_ACTION
from pptx.util import Cm
from pptx.enum.dml import MSO_THEME_COLOR_INDEX
from pptx.enum.text import MSO_AUTO_SIZE
from pptx.util import Pt
I am using python-pptx v0.6.18 and python v3.8
Edit
Example that creates the shape but no shadow appears:
#Import modules
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE
from pptx.util import Cm
from pptx.enum.dml import MSO_THEME_COLOR_INDEX
from pptx.util import Pt
#Open powerpoint file
prs = Presentation('filename.pptx')
#Create a slide
slidelayout = prs.slide_layouts[0]
slide = prs.slides.add_slide(slidelayout)
shapes = slide.shapes
#Add a shape
shape = shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, Cm(10), Cm(10), Cm(10), Cm(10))
#Create a shadow
shadow = shape.shadow
shadow.inherit = False
shadow.visible = True
shadow.distance = Pt(10)
shadow.shadow_type = 'outer'
shadow.angle = 45
shadow.blur_radius = Pt(5)
shadow.color = MSO_THEME_COLOR_INDEX.ACCENT_5
shadow.transparency = '50'
shadow.distance = Pt(5)
shape.shadow.style = 'outer'
#Save the powerpoint file
prs.save('filename2.pptx')
Example that creates the error message:
#Import modules
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE
from pptx.util import Cm
from pptx.enum.dml import MSO_THEME_COLOR_INDEX
from pptx.util import Pt
#Open powerpoint file
prs = Presentation('filename.pptx')
#Create a slide
slidelayout = prs.slide_layouts[0]
slide = prs.slides.add_slide(slidelayout)
shapes = slide.shapes
#Add a shape
shape = shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, Cm(10), Cm(10), Cm(10), Cm(10))
#Create a shadow
shadow = shape.shadow
shadow.visible
#Save the powerpoint file
prs.save('filename2.pptx')
The feature <ShadowFormat.visible - applies a reasonable standard shadow override.> is currently out of the scope of pptx.
The command <shadow.inherit = False> is used to remove the default setting with the shadow.
By default, the shadow visibility is set to true. If you want to show the shadow, you can either:
set <shadow.inherit = True>
remove <shadow.inherit = False>
You can use Aspose.Slides for Python to manipulate the shapes. This is a paid library, but you can get a temporary license to evaluate it. The following code example shows you how to add a shape with a shadow to a presentation:
import aspose.slides as slides
# Create a new presentation.
with slides.Presentation() as presentation:
# Create a shape.
shape = presentation.slides[0].shapes.add_auto_shape(slides.ShapeType.ROUND_CORNER_RECTANGLE, 10, 10, 20, 20)
# Set a shadow for the shape.
shape.effect_format.enable_outer_shadow_effect()
shape.effect_format.outer_shadow_effect.distance = 10
shape.effect_format.outer_shadow_effect.direction = 45
shape.effect_format.outer_shadow_effect.blur_radius = 5
shape.effect_format.outer_shadow_effect.shadow_color.color = presentation.master_theme.color_scheme.accent5.color
# Save the presentation.
presentation.save("example.pptx", slides.export.SaveFormat.PPTX)
The result:
Alternatively, you can use Aspose.Slides Cloud SDK for Python. This product provides a REST-based API for presentation processing. It is also a paid product, but you can make 150 free API calls per month for experimentation, learning, and any other purpose. The following code example creates the same shape with the shadow using Aspose.Slides Cloud:
import asposeslidescloud
from asposeslidescloud.apis.slides_api import SlidesApi
from asposeslidescloud.models.shape import Shape
from asposeslidescloud.models.effect_format import EffectFormat
from asposeslidescloud.models.outer_shadow_effect import OuterShadowEffect
slides_api = SlidesApi(None, "my_client_id", "my_client_secret")
# Let's a presentation exists in a storage.
file_name = "example.pptx"
slide_index = 1
color_scheme = slides_api.get_color_scheme(file_name, slide_index)
# Prepare DTO for a shape with the shadow.
shape = Shape()
shape.shape_type = "RoundCornerRectangle"
shape.x = 10
shape.y = 10
shape.width = 20
shape.height = 20
shape.effect_format = EffectFormat()
shape.effect_format.outer_shadow = OuterShadowEffect()
shape.effect_format.outer_shadow.distance = 10
shape.effect_format.outer_shadow.direction = 45
shape.effect_format.outer_shadow.blur_radius = 5
shape.effect_format.outer_shadow.shadow_color = color_scheme.accent5
# Create the shape.
slides_api.create_shape(file_name, slide_index, shape)
I work as a Support Developer at Aspose.
I have a data set for myself.
My code works perfectly when i go to the camera. But I test again with unknown person the program predicts again as me. It should return -1 or null !!!
I run in python3 and latest version of cv2 from opencv.
->detector.py
import cv2,os
import numpy as np
from PIL import Image
import pickle,time
# Root directory
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
recognizer = cv2.face.LBPHFaceRecognizer_create()#cv2.createLBPHFaceRecognizer()
recognizer.read('trainer/trainer.yml')
cascadePath = "Classifiers/face.xml"
faceCascade = cv2.CascadeClassifier(cascadePath);
path = 'dataSet'
cam = cv2.VideoCapture(0)
print(cam.isOpened())
#font = cv2.InitFont(cv2.CV_FONT_HERSHEY_SIMPLEX, 1, 1, 0, 1, 1)
#Creates a font
font = cv2.FONT_HERSHEY_SIMPLEX
while True:
ret, im =cam.read()
print('im: ',im,' ;ROOT_DIR:',ROOT_DIR)
print('ret: ',ret)
gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
faces=faceCascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5, minSize=(100, 100), flags=cv2.CASCADE_SCALE_IMAGE)
for(x,y,w,h) in faces:
nbr_predicted, conf = recognizer.predict(gray[y:y+h,x:x+w])
cv2.rectangle(im,(x-50,y-50),(x+w+50,y+h+50),(225,0,0),2)
print("nbr_predicted: ",nbr_predicted)
if(nbr_predicted==7 or nbr_predicted==8):
nbr_predicted='Vishesh 1'
else:
nbr_predicted="Unknown Person"
cv2.putText(im,str(nbr_predicted)+"--"+str(conf), (x,y+h),font, 0.8, (0, 255, 0), 2, cv2.LINE_AA) #Draw the text
cv2.imshow('im',im)
cv2.waitKey(10)
Only the person in middle must be Vishesh. But even my bros are predicted as Vishesh. Where could this be possibly going wrong ??
ecognizer.predict(gray[y:y+h,x:x+w]) returns the prediction index that shows the number of the person in training set and confidence for it for all persons in the training set with a confidence value.
Since your photo is the only one in the training set, it returns nbr_predicted is always your ID and confidence shows the "probability" that the image send to predict function is you.
hence update your code as:
if(nbr_predicted==7 and conf<120):
nbr_predicted='Vishesh 1'
else:
nbr_predicted="Unknown Person"
the value of confidence usually depends on your data, but the lower the better.
you should adjust the threshold