Extracting the detected objects from image by PILLOW library

Extracting the detected objects from image by PILLOW library - python

the code below is to detect the objects in images. The code can detect the objects, count the number of detected objects, drawing rectangles to label the objects. Is there any way to extract and save each detected objects? For example; if there are 20 objects detected in an image, I would like to extract and save these 20 objects seperately. I would be appreciated for any help..
from PIL import Image
class TheOutliner(object):
''' takes a dict of xy points and
draws a rectangle around them '''
def __init__(self):
self.outlineColor = 255, 0, 0
self.pic = None
self.picn = None
self.minX = 0
self.minY = 0
self.maxX = 0
self.maxY = 0
def doEverything(self, imgPath, dictPoints, theoutfile):
self.loadImage(imgPath)
self.loadBrightPoints(dictPoints)
self.drawBox()
self.saveImg(theoutfile)
def loadImage(self, imgPath):
self.pic = Image.open(imgPath)
self.picn = self.pic.load()
def loadBrightPoints(self, dictPoints):
'''iterate through all points and
gather max/min x/y '''
# an x from the pool (the max/min
# must be from dictPoints)
self.minX = dictPoints.keys()[0][0]
self.maxX = self.minX
self.minY = dictPoints.keys()[0][1]
self.maxY = self.minY
for point in dictPoints.keys():
if point[0] < self.minX:
self.minX = point[0]
elif point[0] > self.maxX:
self.maxX = point[0]
if point[1]< self.minY:
self.minY = point[1]
elif point[1] > self.maxY:
self.maxY = point[1]
def drawBox(self):
# drop box around bright points
for x in xrange(self.minX, self.maxX):
# top bar
self.picn[x, self.minY] = self.outlineColor
# bottom bar
self.picn[x, self.maxY] = self.outlineColor
for y in xrange(self.minY, self.maxY):
# left bar
self.picn[self.minX, y] = self.outlineColor
# right bar
self.picn[self.maxX, y] = self.outlineColor
def saveImg(self, theoutfile):
self.pic.save(theoutfile, "JPEG")
class ObjectDetector(object):
''' returns a list of dicts representing
all the objects in the image '''
def __init__(self):
self.detail = 4
self.objects = []
self.size = 1000
self.no = 255
self.close = 100
self.pic = None
self.picn = None
self.brightDict = {}
def loadImage(self, imgPath):
self.pic = Image.open(imgPath)
self.picn = self.pic.load()
self.picSize = self.pic.size
self.detail = (self.picSize[0] + self.picSize[1])/2000
self.size = (self.picSize[0] + self.picSize[1])/8
# each must be at least 1 -- and the larger
# the self.detail is the faster the analyzation will be
self.detail += 1
self.size += 1
def getSurroundingPoints(self, xy):
''' returns list of adjoining point '''
x = xy[0]
y = xy[1]
plist = (
(x-self.detail, y-self.detail), (x, y-self.detail),
(x+self.detail, y-self.detail),
(x-self.detail, y),(x+self.detail, y),
(x-self.detail, y+self.detail),(x, y+self.detail),
(x+self.detail,y+self.detail)
)
return (plist)
def getRGBFor(self, x, y):
try:
return self.picn[x,y]
except IndexError as e:
return 255,255,255
def readyToBeEvaluated(self, xy):
try:
r,g,b = self.picn[xy[0],xy[1]]
if r==255 and g==255 and b==255:
return False
except:
return False
return True
def markEvaluated(self, xy):
try:
self.picn[xy[0],xy[1]] = self.no, self.no, self.no
except:
pass
def collectAllObjectPoints(self):
for x in xrange(self.pic.size[0]):
if x % self.detail == 0:
for y in xrange(self.pic.size[1]):
if y % self.detail == 0:
r,g,b = self.picn[x,y]
if r == self.no and \
g == self.no and \
b == self.no:
# then no more
pass
else:
ol = {}
ol[x,y] = "go"
pp = []
pp.append((x,y))
stillLooking = True
while stillLooking:
if len(pp) > 0:
xe, ye = pp.pop()
# look for adjoining points
for p in
self.getSurroundingPoints((xe,ye)
if self.readyToBeEvaluated((p[0],
p[1])):
r2,g2,b2 = self.getRGBFor(p[0],
p[1])
if abs(r-r2) < self.close and \
abs(g-g2) < self.close and \
abs(b-b2) < self.close:
# then its close enough
ol[p[0],p[1]] = "go"
pp.append((p[0],p[1]))
self.markEvaluated((p[0],p[1]))
self.markEvaluated((xe,ye))
else:
# done expanding that point
stillLooking = False
if len(ol) > self.size:
self.objects.append(ol)
if __name__ == "__main__":
print "Start Process";
# assumes that the .jpg files are in
# working directory
theFile = "new2"
theOutFile = "new2.output"
import os
os.listdir('.')
for f in os.listdir('.'):
if f.find(".jpg") > 0:
theFile = f
print "working on " + theFile + "..."
theOutFile = theFile + ".out.jpg"
bbb = ObjectDetector()
bbb.loadImage(theFile)
print " analyzing.."
print " file dimensions: " + str(bbb.picSize)
print " this files object weight: " + str(bbb.size)
print " this files analyzation detail: " +
str(bbb.detail)
bbb.collectAllObjectPoints()
print " objects detected: " +str(len(bbb.objects))
drawer = TheOutliner()
print " loading and drawing rectangles.."
drawer.loadImage(theFile)
for o in bbb.objects:
drawer.loadBrightPoints(o)
drawer.drawBox()
print "saving image..."
drawer.saveImg(theOutFile)
print "Process complete"

You can use crop() method :
add new method after drawBox():
def saveBox(self,filename):
# Create Box
box = (self.minX, self.minY, self.maxX, self.maxY)
# Crop Image
self.pic.crop(box).save(filename)
change the main code :
drawer.loadImage(theFile)
idBox=0
for o in bbb.objects:
drawer.loadBrightPoints(o)
drawer.drawBox()
idBox=idBox+1
drawer.saveBox("box_"+str(idBox)+"_"+theOutFile)
print "saving image..."
drawer.saveImg(theOutFile)

Related

tkinter - Infinite Canvas "world" / "view" - keeping track of items in view

I feel like this is a little bit complicated or at least I'm confused on it, so I'll try to explain it by rendering the issue. Let me know if the issue isn't clear.
I get the output from my viewing_box through the __init__ method and it shows:
(0, 0, 378, 265)
Which is equivalent to a width of 378 and a height of 265.
When failing, I track the output:
1 false
1 false
here ([0.0, -60.0], [100.0, 40.0]) (0, 60, 378, 325)
The tracking is done in _scan_view with the code:
if not viewable:
current = self.itemcget(item,'tags')
if isinstance(current, tuple):
new = current-('viewable',)
else:
print('here',points, (x1,y1,x2,y2))
new = ''
self.inview_items.discard(item)
So the rectangle stays with width and height of 100, the coords however failing to be the expected ones. While view width and height stays the same and moves correctly in my current understanding. Expected:
if x1 <= point[0] <= x2 and y1 <= point[1] <= y2: and it feels like I've created two coordinate systems but I don't get it. Is someone looking on it and see it immediately?
Full Code:
import tkinter as tk
class InfiniteCanvas(tk.Canvas):
def __init__(self, master, **kwargs):
super().__init__(master, **kwargs)
self.inview_items = set() #in view
self.niview_items = set() #not in view
self._xshifted = 0 #view moved in x direction
self._yshifted = 0 #view moved in y direction
self._multi = 0
self.configure(confine=False,highlightthickness=0,bd=0)
self.bind('<MouseWheel>', self._vscroll)
self.bind('<Shift-MouseWheel>', self._hscroll)
root.bind('<Control-KeyPress>',lambda e:setattr(self,'_multi', 10))
root.bind('<Control-KeyRelease>',lambda e:setattr(self,'_multi', 0))
print(self.viewing_box())
return None
def viewing_box(self):
'returns x1,y1,x2,y2 of the currently visible area'
x1 = 0 - self._xshifted
y1 = 0 - self._yshifted
x2 = self.winfo_reqwidth()-self._xshifted
y2 = self.winfo_reqheight()-self._yshifted
return x1,y1,x2,y2
def _scan_view(self):
x1,y1,x2,y2 = self.viewing_box()
for item in self.find_withtag('viewable'):
#check if one felt over the edge
coords = self.coords(item)
#https://www.geeksforgeeks.org/python-split-tuple-into-groups-of-n/
points = tuple(
coords[x:x + 2] for x in range(0, len(coords), 2))
viewable = False
for point in points:
if x1 <= point[0] <= x2 and y1 <= point[1] <= y2:
#if any point is in viewing box
viewable = True
print(item, 'true')
else:
print(item, 'false' )
if not viewable:
current = self.itemcget(item,'tags')
if isinstance(current, tuple):
new = current-('viewable',)
else:
print('here',points, (x1,y1,x2,y2))
new = ''
self.inview_items.discard(item)
self.itemconfigure(item,tags=new)
for item in self.find_overlapping(x1,y1,x2,y2):
#check if item inside of viewing_box not in inview_items
if item not in self.inview_items:
self.inview_items.add(item)
current = self.itemcget(item,'tags')
if isinstance(current, tuple):
new = current+('viewable',)
elif isinstance(current, str):
if str:
new = (current, 'viewable')
else:
new = 'viewable'
self.itemconfigure(item,tags=new)
print(self.inview_items)
def _create(self, *args):
if (current:=args[-1].get('tags', False)):
args[-1]['tags'] = current+('viewable',)
else:
args[-1]['tags'] = ('viewable',)
ident = super()._create(*args)
self._scan_view()
return ident
def _hscroll(self,event):
offset = int(event.delta/120)
if self._multi:
offset = int(offset*self._multi)
canvas.move('all', offset,0)
self._xshifted += offset
self._scan_view()
def _vscroll(self,event):
offset = int(event.delta/120)
if self._multi:
offset = int(offset*self._multi)
canvas.move('all', 0,offset)
self._yshifted += offset
self._scan_view()
root = tk.Tk()
canvas = InfiniteCanvas(root)
canvas.pack(fill=tk.BOTH, expand=True)
size, offset, start = 100, 10, 0
canvas.create_rectangle(start,start, size,size, fill='green')
canvas.create_rectangle(
start+offset,start+offset, size+offset,size+offset, fill='darkgreen')
root.mainloop()
PS: Before thinking this is over-complicated and using just find_overlapping isn't working, since it seems the item needs to be at least 51% in the view to get tracked with tkinters algorithm.
You can find an improved version now on CodeReview!

I still don't know what I have done wrong but it works with scan_dragto.
import tkinter as tk
class InfiniteCanvas(tk.Canvas):
def __init__(self, master, **kwargs):
super().__init__(master, **kwargs)
self.inview_items = set() #in view
self.niview_items = set() #not in view
self._xshifted = 0 #view moved in x direction
self._yshifted = 0 #view moved in y direction
self._multi = 0
self.configure(confine=False,highlightthickness=0,bd=0)
self.bind('<MouseWheel>', self._vscroll)
self.bind('<Shift-MouseWheel>', self._hscroll)
root.bind('<Control-KeyPress>',lambda e:setattr(self,'_multi', 10))
root.bind('<Control-KeyRelease>',lambda e:setattr(self,'_multi', 0))
return None
def viewing_box(self):
'returns x1,y1,x2,y2 of the currently visible area'
x1 = 0 - self._xshifted
y1 = 0 - self._yshifted
x2 = self.winfo_reqwidth()-self._xshifted
y2 = self.winfo_reqheight()-self._yshifted
return x1,y1,x2,y2
def _scan_view(self):
x1,y1,x2,y2 = self.viewing_box()
for item in self.find_withtag('viewable'):
#check if one felt over the edge
coords = self.coords(item)
#https://www.geeksforgeeks.org/python-split-tuple-into-groups-of-n/
points = tuple(
coords[x:x + 2] for x in range(0, len(coords), 2))
viewable = False
for point in points:
if x1 <= point[0] <= x2 and y1 <= point[1] <= y2:
#if any point is in viewing box
viewable = True
if not viewable:
current = self.itemcget(item,'tags')
if isinstance(current, tuple):
new = current-('viewable',)
else:
print('here',points, (x1,y1,x2,y2))
new = ''
self.inview_items.discard(item)
self.itemconfigure(item,tags=new)
for item in self.find_overlapping(x1,y1,x2,y2):
#check if item inside of viewing_box not in inview_items
if item not in self.inview_items:
self.inview_items.add(item)
current = self.itemcget(item,'tags')
if isinstance(current, tuple):
new = current+('viewable',)
elif isinstance(current, str):
if str:
new = (current, 'viewable')
else:
new = 'viewable'
self.itemconfigure(item,tags=new)
print(self.inview_items)
def _create(self, *args):
if (current:=args[-1].get('tags', False)):
args[-1]['tags'] = current+('viewable',)
else:
args[-1]['tags'] = ('viewable',)
ident = super()._create(*args)
self._scan_view()
return ident
def _hscroll(self,event):
offset = int(event.delta/120)
if self._multi:
offset = int(offset*self._multi)
cx,cy = self.winfo_rootx(), self.winfo_rooty()
self.scan_mark(cx, cy)
self.scan_dragto(cx+offset, cy, gain=1)
self._xshifted += offset
self._scan_view()
def _vscroll(self,event):
offset = int(event.delta/120)
if self._multi:
offset = int(offset*self._multi)
cx,cy = self.winfo_rootx(), self.winfo_rooty()
self.scan_mark(cx, cy)
self.scan_dragto(cx, cy+offset, gain=1)
self._yshifted += offset
self._scan_view()
root = tk.Tk()
canvas = InfiniteCanvas(root)
canvas.pack(fill=tk.BOTH, expand=True)
size, offset, start = 100, 10, 0
canvas.create_rectangle(start,start, size,size, fill='green')
canvas.create_rectangle(
start+offset,start+offset, size+offset,size+offset, fill='darkgreen')
root.mainloop()

Python/OpenCV - Not detecting grid

The following script comes from http://projectproto.blogspot.co.uk/2014/07/opencv-python-2048-game-solver.html
import cv2
import numpy as np
import win32api, win32gui, win32ui, win32con, win32com.client
from PIL import Image, ImageFont, ImageDraw, ImageOps
# create training model based on the given TTF font file
# http://projectproto.blogspot.com/2014/07/opencv-python-digit-recognition.html
def createDigitsModel(fontfile, digitheight):
font = ImageFont.truetype(fontfile, digitheight)
samples = np.empty((0,digitheight*(digitheight/2)))
responses = []
for n in range(10):
pil_im = Image.new("RGB", (digitheight, digitheight*2))
ImageDraw.Draw(pil_im).text((0, 0), str(n), font=font)
pil_im = pil_im.crop(pil_im.getbbox())
pil_im = ImageOps.invert(pil_im)
#pil_im.save(str(n) + ".png")
# convert to cv image
cv_image = cv2.cvtColor(np.array( pil_im ), cv2.COLOR_RGBA2BGRA)
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)
roi = cv2.resize(thresh,(digitheight,digitheight/2))
responses.append( n )
sample = roi.reshape((1,digitheight*(digitheight/2)))
samples = np.append(samples,sample,0)
samples = np.array(samples,np.float32)
responses = np.array(responses,np.float32)
model = cv2.KNearest()
model.train(samples,responses)
return model
class Board(object):
UP, DOWN, LEFT, RIGHT = 1, 2, 3, 4
FONT = "font/ClearSans-Bold.ttf"
def __init__(self, clientwindowtitle):
self.hwnd = self.getClientWindow(clientwindowtitle)
if not self.hwnd:
return
self.hwndDC = win32gui.GetWindowDC(self.hwnd)
self.mfcDC = win32ui.CreateDCFromHandle(self.hwndDC)
self.saveDC = self.mfcDC.CreateCompatibleDC()
self.cl, self.ct, right, bot = win32gui.GetClientRect(self.hwnd)
self.cw, self.ch = right-self.cl, bot-self.ct
self.cl += win32api.GetSystemMetrics(win32con.SM_CXSIZEFRAME)
self.ct += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)
self.ct += win32api.GetSystemMetrics(win32con.SM_CYCAPTION)
self.ch += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)*2
self.saveBitMap = win32ui.CreateBitmap()
self.saveBitMap.CreateCompatibleBitmap(self.mfcDC, self.cw, self.ch)
self.saveDC.SelectObject(self.saveBitMap)
self.tiles, self.tileheight, self.contour = self.findTiles(self.getClientFrame())
if not len(self.tiles):
return
self.digitheight = self.tileheight / 2
self.digitsmodel = createDigitsModel(self.FONT, self.digitheight)
self.update()
def getClientWindow(self, windowtitle):
toplist, winlist = [], []
def enum_cb(hwnd, results):
winlist.append((hwnd, win32gui.GetWindowText(hwnd)))
win32gui.EnumWindows(enum_cb, toplist)
window = [(hwnd, title) for hwnd, title in winlist if windowtitle.lower() in title.lower()]
if not len(window):
return 0
return window[0][0]
def getClientFrame(self):
self.saveDC.BitBlt((0, 0), (self.cw, self.ch),
self.mfcDC, (self.cl, self.ct), win32con.SRCCOPY)
bmpinfo = self.saveBitMap.GetInfo()
bmpstr = self.saveBitMap.GetBitmapBits(True)
pil_img = Image.frombuffer( 'RGB',
(bmpinfo['bmWidth'], bmpinfo['bmHeight']),
bmpstr, 'raw', 'BGRX', 0, 1)
array = np.array( pil_img )
cvimage = cv2.cvtColor(array, cv2.COLOR_RGBA2BGRA)
return cvimage
def findTiles(self, cvframe):
tiles, avgh = [], 0
gray = cv2.cvtColor(cvframe,cv2.COLOR_BGRA2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def findBoard(contours): # get largest square
ww, sqcnt = 10, None
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if w>ww and abs(w-h)<w/10:
ww = w
sqcnt = cnt
return sqcnt
board = findBoard(contours)
if board==None:
print 'board not found!'
return tiles, avgh, board
bx,by,bw,bh = cv2.boundingRect(board)
#cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
#cv2.imshow('board',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'board' )
maxh = bh/4
minh = (maxh*4)/5
count = 0
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
if y>by and w>minh and w<maxh and h>minh and h<maxh:
avgh += h
count += 1
if not count:
print 'no tile found!'
return tiles, avgh, board
avgh = avgh / count
margin = (bh-avgh*4)/5
for row in range(4):
for col in range(4):
x0 = bx + avgh*col + margin*(col+1)
x1 = x0 + avgh
y0 = by + avgh*row + margin*(row+1)
y1 = y0 + avgh
tiles.append([x0, y0, x1, y1])
#cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
#cv2.imshow('tiles',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'tiles' )
return tiles, avgh, board
def getTileThreshold(self, tileimage):
gray = cv2.cvtColor(tileimage,cv2.COLOR_BGR2GRAY)
row, col = gray.shape
tmp = gray.copy().reshape(1, row*col)
counts = np.bincount(tmp[0])
sort = np.sort(counts)
modes, freqs = [], []
for i in range(len(sort)):
freq = sort[-1-i]
if freq < 4:
break
mode = np.where(counts==freq)[0][0]
modes.append(mode)
freqs.append(freq)
bg, fg = modes[0], modes[0]
for i in range(len(modes)):
fg = modes[i]
#if abs(bg-fg)>=48:
if abs(bg-fg)>32 and abs(fg-150)>4: # 150?!
break
#print bg, fg
if bg>fg: # needs dark background ?
tmp = 255 - tmp
bg, fg = 255-bg, 255-fg
tmp = tmp.reshape(row, col)
ret, thresh = cv2.threshold(tmp,(bg+fg)/2,255,cv2.THRESH_BINARY)
return thresh
def getTileNumbers(self, cvframe):
numbers = []
outframe = np.zeros(cvframe.shape,np.uint8)
def guessNumber(digits):
for i in range(1,16):
nn = 2**i
ss = str(nn)
dd = [int(c) for c in ss]
if set(digits) == set(dd):
return nn
return 0
for tile in self.tiles:
x0,y0,x1,y1 = tile
tileimage = cvframe[y0:y1,x0:x1]
cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
cv2.rectangle(outframe,(x0,y0),(x1,y1),(0,255,0),1)
thresh = self.getTileThreshold(tileimage)
contours,hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
dh = self.digitheight
digits = []
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if h>w and h>(dh*1)/5 and h<(dh*6)/5:
cv2.rectangle(cvframe,(x0+x,y0+y),(x0+x+w,y0+y+h),(0,0,255),1)
roi = thresh[y:y+h,x:x+w]
roi = cv2.resize(roi,(dh,dh/2))
roi = roi.reshape((1,dh*(dh/2)))
roi = np.float32(roi)
retval, results, neigh_resp, dists = self.digitsmodel.find_nearest(roi, k=1)
digit = int((results[0][0]))
string = str(digit)
digits.append(digit)
cv2.putText(outframe,string,(x0+x,y0+y+h),0,float(h)/24,(0,255,0))
numbers.append(guessNumber(digits))
return numbers, outframe
def getWindowHandle(self):
return self.hwnd
def getBoardContour(self):
return self.contour
def update(self):
frame = self.getClientFrame()
self.tilenumbers, outframe = self.getTileNumbers(frame)
return self.tilenumbers, frame, outframe
def copyTileNumbers(self):
return self.tilenumbers[:]
def getCell(self, tiles, x, y):
return tiles[(y*4)+x]
def setCell(self, tiles, x, y, v):
tiles[(y*4)+x] = v
return tiles
def getCol(self, tiles, x):
return [self.getCell(tiles, x, i) for i in range(4)]
def setCol(self, tiles, x, col):
for i in range(4):
self.setCell(tiles, x, i, col[i])
return tiles
def getLine(self, tiles, y):
return [self.getCell(tiles, i, y) for i in range(4)]
def setLine(self, tiles, y, line):
for i in range(4):
self.setCell(tiles, i, y, line[i])
return tiles
def validMove(self, tilenumbers, direction):
if direction == self.UP or direction == self.DOWN:
for x in range(4):
col = self.getCol(tilenumbers, x)
for y in range(4):
if(y < 4-1 and col[y] == col[y+1] and col[y]!=0):
return True
if(direction == self.DOWN and y > 0 and col[y] == 0 and col[y-1]!=0):
return True
if(direction == self.UP and y < 4-1 and col[y] == 0 and col[y+1]!=0):
return True
if direction == self.LEFT or direction == self.RIGHT:
for y in range(4):
line = self.getLine(tilenumbers, y)
for x in range(4):
if(x < 4-1 and line[x] == line[x+1] and line[x]!=0):
return True
if(direction == self.RIGHT and x > 0 and line[x] == 0 and line[x-1]!=0):
return True
if(direction == self.LEFT and x < 4-1 and line[x] == 0 and line[x+1]!=0):
return True
return False
def moveTileNumbers(self, tilenumbers, direction):
def collapseline(line, direction):
if (direction==self.LEFT or direction==self.UP):
inc = 1
rg = xrange(0, 4-1, inc)
else:
inc = -1
rg = xrange(4-1, 0, inc)
pts = 0
for i in rg:
if line[i] == 0:
continue
if line[i] == line[i+inc]:
v = line[i]*2
line[i] = v
line[i+inc] = 0
pts += v
return line, pts
def moveline(line, directsion):
nl = [c for c in line if c != 0]
if directsion==self.UP or directsion==self.LEFT:
return nl + [0] * (4 - len(nl))
return [0] * (4 - len(nl)) + nl
score = 0
if direction==self.LEFT or direction==self.RIGHT:
for i in range(4):
origin = self.getLine(tilenumbers, i)
line = moveline(origin, direction)
collapsed, pts = collapseline(line, direction)
new = moveline(collapsed, direction)
tilenumbers = self.setLine(tilenumbers, i, new)
score += pts
elif direction==self.UP or direction==self.DOWN:
for i in range(4):
origin = self.getCol(tilenumbers, i)
line = moveline(origin, direction)
collapsed, pts = collapseline(line, direction)
new = moveline(collapsed, direction)
tilenumbers = self.setCol(tilenumbers, i, new)
score += pts
return score, tilenumbers
# AI based on "term2048-AI"
# https://github.com/Nicola17/term2048-AI
class AI(object):
def __init__(self, board):
self.board = board
def nextMove(self):
tilenumbers = self.board.copyTileNumbers()
m, s = self.nextMoveRecur(tilenumbers[:],3,3)
return m
def nextMoveRecur(self, tilenumbers, depth, maxDepth, base=0.9):
bestMove, bestScore = 0, -1
for m in range(1,5):
if(self.board.validMove(tilenumbers, m)):
score, newtiles = self.board.moveTileNumbers(tilenumbers[:], m)
score, critical = self.evaluate(newtiles)
newtiles = self.board.setCell(newtiles,critical[0],critical[1],2)
if depth != 0:
my_m,my_s = self.nextMoveRecur(newtiles[:],depth-1,maxDepth)
score += my_s*pow(base,maxDepth-depth+1)
if(score > bestScore):
bestMove = m
bestScore = score
return bestMove, bestScore
def evaluate(self, tilenumbers, commonRatio=0.25):
maxVal = 0.
criticalTile = (-1, -1)
for i in range(8):
linearWeightedVal = 0
invert = False if i<4 else True
weight = 1.
ctile = (-1,-1)
cond = i%4
for y in range(4):
for x in range(4):
if cond==0:
b_x = 4-1-x if invert else x
b_y = y
elif cond==1:
b_x = x
b_y = 4-1-y if invert else y
elif cond==2:
b_x = 4-1-x if invert else x
b_y = 4-1-y
elif cond==3:
b_x = 4-1-x
b_y = 4-1-y if invert else y
currVal=self.board.getCell(tilenumbers,b_x,b_y)
if(currVal == 0 and ctile == (-1,-1)):
ctile = (b_x,b_y)
linearWeightedVal += currVal*weight
weight *= commonRatio
invert = not invert
if linearWeightedVal > maxVal:
maxVal = linearWeightedVal
criticalTile = ctile
return maxVal, criticalTile
def solveBoard(self, moveinterval=500):
boardHWND = self.board.getWindowHandle()
if not boardHWND:
return False
bx, by, bw, bh = cv2.boundingRect(self.board.getBoardContour())
x0, x1, y0, y1 = bx, bx+bw, by, by+bh
win32gui.SetForegroundWindow(boardHWND)
shell = win32com.client.Dispatch('WScript.Shell')
print 'Set the focus to the Game Window, and the press this arrow key:'
keymove = ['UP', 'DOWN', 'LEFT', 'RIGHT']
delay = moveinterval / 3 # milliseconds delay to cancel board animation effect
prev_numbers = []
while True:
numbers, inframe, outframe = self.board.update()
if numbers != prev_numbers:
cv2.waitKey(delay)
numbers, inframe, outframe = self.board.update()
if numbers == prev_numbers: # recheck if has changed
continue
prev_numbers = numbers
move = ai.nextMove()
if move:
key = keymove[move-1]
shell.SendKeys('{%s}'%key)
print key
cv2.waitKey(delay)
cv2.imshow('CV copy',inframe[y0:y1,x0:x1])
cv2.imshow('CV out', outframe[y0:y1,x0:x1])
cv2.waitKey(delay)
cv2.destroyWindow( 'CV copy' )
cv2.destroyWindow( 'CV out' )
# http://gabrielecirulli.github.io/2048/
# http://ov3y.github.io/2048-AI/
board = Board("2048 - Google Chrome")
#board = Board("2048 - Mozilla Firefox")
ai = AI(board)
ai.solveBoard(360)
print 'stopped.'
I have Google Chrome open with the example URL http://ov3y.github.io/2048-AI/ open, running the script has the following error:
20.py:109: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
if board==None:
no tile found!
Set the focus to the Game Window, and the press this arrow key:
Then nothing, it just sits there. So the part I'm concerned with most is no tile found!. Un commenting the lines:
#cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
#cv2.imshow('board',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'board' )
Shows the following window on screen:
Can anyone explain why OpenCV is failing to detect the grid, or how to go about debugging this?

Most likely it is not a problem with detecting grid, but with capturing browser window - you are trying to find grid on an empty image which of course fails. First make sure that you have grabbed firefox/chrome/opera screen window correctly - in function getClientFrame(self) put this code:
cv2.imshow('browser window', cvimage)
cv2.waitKey(10000)
just before the final return cvimage. It should show you the browser window for 10 seconds. If it doesn't than it will 100% sure that problem is with capturing browser window, not with detecting grid. To check what's wrong with capturing browser window use win32api.GetLastError() function (you can check error codes here).
Of course there is a chance that i'm wrong and it's a problem with detecting grid - if so, please provide a sample image (just save the image displayed by the code i provided) so we can test it.
\\edit:
I've just noticed the second part of your post - so most likely i'm wrong, but you can test it anyway. It seems that you are capturing one chrome window and part of some other window - try to make your browser window fullscreen.
\\edit2:
After closer look at you image i realised strange thing - capture image has vertical lines and has width (without repeated part onthe right side) smaller than the original window(but height seems to be fine). Width seems to be 75% of original width so i guess that PIL treats every 4 bytes as one pixel, but it should use only 3 bytes per pixel. It's hard to test it, because on my system (win 8.1 64bit) it's working fine. Possible solutions (i can't test them, so you need to check which one will work.. sory :) ):
Try to change this line:
pil_img = Image.frombuffer( 'RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)
to something like this:
pil_img = Image.frombuffer( 'RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGR', 0, 1)
generally you need to change value of fifth parameter from BGRX to something else - most likely to 'BGR', full list of options is here. If it won't work try to play with different values of first and fifth parameters.
On the screenshot it looks like you have some quite old version of Windows or at least you are using old gui (which is great btw!). If - except for setting gui style to "old style" - you've set (or windows've done it for you) your color quality to something else than "Highest (32bit)" it may cause your problem as well. Try to set it to "Highest (32 bit)". To be clear - i'm talking about settings from this window:
(on the right side, near bottom and color palette).
If you have 2 (or more) screens, test you program while using only one. Also if you are using some alternative window manager (or some other weird extension like something for multiple desktops) turn it off and try again.

python script speed improvements

for my robot I am analyzing laser range data. I need to analyze a lot of samples per second. So speed is required.
I know python is not the right language based on this - but I don't want to switch for now as I am in the prototyping phase (will see if I ever get out of it :-) ).
At the moment I am stuck on squeezing more speed out of the analyzing code I have.
I pulled out the relevant code and created a small test. It would be brilliant if someone could give me a some hints on where to improve speed in this test script.
from math import degrees, radians, sin, cos, fabs
import time
class NewRobotMap(object):
def __init__(self, sizeX, sizeY, Resolution, RobotPosX, RobotPosY, RobotTheta, ServoPos, mapMaxOcc, mapMaxFree, OccValue, EmptyValue):
self.sizeX = sizeX
self.sizeY = sizeY
self.RobotPosX = int(RobotPosX)
self.RobotPosY = int(RobotPosY)
self.mapResolution = int(Resolution)
self.StartPosX = int(RobotPosX)
self.StartPosY = int(RobotPosY)
self.RobotTheta = float(RobotTheta)
self.EmptyValue = EmptyValue
self.ServoPos = ServoPos
self.mapMaxOcc = mapMaxOcc
self.mapMaxFree = mapMaxFree
self.mapOccValue = OccValue
self.RobotPosOldX = ""
self.RobotPosOldY = ""
def clear(self):
self.RobotMap = [[self.EmptyValue for i in xrange(self.sizeY)] for j in xrange(self.sizeX)]
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
if self.RobotMap[x][y] > self.mapMaxOcc:
self.RobotMap[x][y] = self.mapMaxOcc
elif self.RobotMap[x][y] < self.mapMaxFree:
self.RobotMap[x][y] = self.mapMaxFree
return oldval, self.RobotMap[x][y]
def setOcc(self,x,y):
self.RobotMap[x][y] = self.mapMaxOcc
def updateRobot(self,theta,x,y):
robotThetaold=self.RobotTheta
self.RobotTheta = float(theta)
self.RobotPosX = int(round(self.StartPosX + float(int(x)/self.mapResolution), 0))
self.RobotPosY = int(round(self.StartPosY - float(int(y)/self.mapResolution),0))
if x != self.RobotPosOldX or y != self.RobotPosOldX:
self.RobotPosOldX = x
self.RobotPosOldY = y
return True
else:
self.RobotPosOldX = x
self.RobotPosOldY = y
return False
def getRobotPos(self):
return self.RobotPosX, self.RobotPosY
def display(self):
s = [[str(e) for e in row] for row in self.RobotMap]
lens = [len(max(col, key=len)) for col in zip(*s)]
fmt = '\t'.join('{{:{}}}'.format(x) for x in lens)
table = [fmt.format(*row) for row in s]
print '\n'.join(table)
def updateServoPos(self, newServoPos):
self.ServoPos = newServoPos
templateData = {
'MapWidth' : 800,
'MapHeight': 600,
'StartPosX' : 500,
'StartPosY' : 300,
'StartTheta' : 0,
'Resolution' : 5,
'mapThresholdFree' : 126,
'mapThresholdOcc' : 130, #169
'EmptyValue' : 128,
'mapMaxOcc' : 137,
'mapMaxFree' : 119,
'ServoPos' : 0,
'CurrentPosX' : 0,
'CurrentPosY' : 0,
'CurrentTheta' : 0,
'SafeZone' : 10
}
templateData["MapHeight"] = templateData["MapHeight"] / templateData["Resolution"]
templateData["MapWidth"] = templateData["MapWidth"] / templateData["Resolution"]
templateData["StartPosX"] = templateData["StartPosX"] / templateData["Resolution"]
templateData["StartPosY"] = templateData["StartPosY"] / templateData["Resolution"]
def updateSonarCalcMapVal(val):
mapThresholdFree = templateData["mapThresholdFree"]
mapThresholdOcc = templateData["mapThresholdOcc"]
#oldval
if val[0] <= mapThresholdFree:
oldval = 0
elif mapThresholdFree < val[0] < mapThresholdOcc:
oldval = 1
elif val[0] >= mapThresholdOcc:
oldval = 2
# newval
if val[1] <= mapThresholdFree:
newval = 0
elif mapThresholdFree < val[1] < mapThresholdOcc:
newval = 1
elif val[1] >= mapThresholdOcc:
newval = 2
if oldval != newval:
return newval
else:
return 'n'
def dur( op=None, clock=[time.time()] ):
if op != None:
duration = time.time() - clock[0]
print '%s finished. Duration %.6f seconds.' % (op, duration)
clock[0] = time.time()
def updateIRWrite(RobotPos, coord, updateval):
XtoUpdate=RobotPos[0] + coord[0]
YtoUpdate=RobotPos[1] - coord[1]
val = map.updateMap(XtoUpdate, YtoUpdate , updateval)
newval=updateSonarCalcMapVal(val)
########### main Script #############
map=NewRobotMap(templateData["MapWidth"],templateData["MapHeight"], templateData["Resolution"], templateData["StartPosX"],templateData["StartPosY"], templateData["StartTheta"], templateData["ServoPos"],templateData["mapMaxOcc"],templateData["mapMaxFree"],templateData["mapThresholdOcc"],templateData["EmptyValue"])
map.clear()
dur()
for x in xrange(0,10001*40):
updateIRWrite((100,100), (10,10), 1)
dur("loops")
I tried a numpy array as self.RobotMap in the NewRobotMap class/object. But this was much slower.

Few tips
Minimize too deep redirections
Your code here:
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
if self.RobotMap[x][y] > self.mapMaxOcc:
self.RobotMap[x][y] = self.mapMaxOcc
elif self.RobotMap[x][y] < self.mapMaxFree:
self.RobotMap[x][y] = self.mapMaxFree
return oldval, self.RobotMap[x][y]
is all the time repeating self.RobotMap[x][y] what requires 4 levels of hops to get the value (self -> RobotMap -> [x] -> [y])
This can be optimized:
In place update
old:
self.RobotMap[x][y]=self.RobotMap[x][y] + Val
new (saving diving for existing value second time)
self.RobotMap[x][y] += Val
Use local variable instead of deeply nested structure
def updateMap(self ,x ,y , Val):
oldval = self.RobotMap[x][y]
newval = oldval + Val
if newval > self.mapMaxOcc:
newval = self.mapMaxOcc
elif newval < self.mapMaxFree:
newval = self.mapMaxFree
return oldval, newval
Note, that your old return oldval, self.RobotMap[x][y] is not only returning a value, but you have already modified the self.RobotMap[x][y] anyway (as it is mutable), so if you rely on that, you could be surprised.
Using global variables instead of tempateData dictionary
Changing dictionary into global variable speeded up the run a bit as it removed one level ov indirection. I know, it looks nasty, but this may happen with optimization.
Skip returning self.RobotMap[x][y]
Consider saving returning self.RobotMap[x][y] if this not necessary, or if you have already changed that value.
Quick clear
change original:
def clear(self):
self.RobotMap = [[self.EmptyValue for i in xrange(self.sizeY)] for j in xrange(self.sizeX)]
to:
def clear(self):
self.RobotMap = self.sizeY * [self.sizeY * [self.EmptyValue]]
My test show about twice as fast execution for x = 3, y = 5, larger sizez could be even better.
Modified code - from 0.790581 to 0.479875 seconds
from math import degrees, radians, sin, cos, fabs
import time
templ_MapWidth = 800
templ_MapHeight = 600
templ_StartPosX = 500
templ_StartPosY = 300
templ_StartTheta = 0
templ_Resolution = 5
templ_mapThresholdFree = 126
templ_mapThresholdOcc = 130
templ_EmptyValue = 128
templ_mapMaxOcc = 137
templ_mapMaxFree = 119
templ_ServoPos = 0
templ_CurrentPosX = 0
templ_CurrentPosY = 0
templ_CurrentTheta = 0
templ_SafeZone = 10
templ_MapHeight = templ_MapHeight / templ_Resolution
templ_MapWidth = templ_MapWidth / templ_Resolution
templ_StartPosX = templ_StartPosX / templ_Resolution
templ_StartPosY = templ_StartPosY / templ_Resolution
class NewRobotMap(object):
def __init__(self, sizeX, sizeY, Resolution, RobotPosX, RobotPosY, RobotTheta, ServoPos, mapMaxOcc, mapMaxFree, OccValue, EmptyValue):
self.sizeX = sizeX
self.sizeY = sizeY
self.RobotPosX = int(RobotPosX)
self.RobotPosY = int(RobotPosY)
self.mapResolution = int(Resolution)
self.StartPosX = int(RobotPosX)
self.StartPosY = int(RobotPosY)
self.RobotTheta = float(RobotTheta)
self.EmptyValue = EmptyValue
self.ServoPos = ServoPos
self.mapMaxOcc = mapMaxOcc
self.mapMaxFree = mapMaxFree
self.mapOccValue = OccValue
self.RobotPosOldX = ""
self.RobotPosOldY = ""
def clear(self):
self.RobotMap = self.sizeX * [self.sizeY * [self.EmptyValue]]
def updateMap(self, x, y, Val):
oldval = self.RobotMap[x][y]
newval = oldval + Val
if newval < self.mapMaxFree:
return oldval, self.mapMaxFree
if newval > self.mapMaxOcc:
return oldval, self.mapMaxOcc
return oldval, newval
def setOcc(self, x, y):
self.RobotMap[x][y] = self.mapMaxOcc
def updateRobot(self, theta, x, y):
robotThetaold = self.RobotTheta
self.RobotTheta = float(theta)
self.RobotPosX = int(round(self.StartPosX + float(int(x)/self.mapResolution), 0))
self.RobotPosY = int(round(self.StartPosY - float(int(y)/self.mapResolution), 0))
if x != self.RobotPosOldX or y != self.RobotPosOldX:
self.RobotPosOldX = x
self.RobotPosOldY = y
return True
else:
self.RobotPosOldX = x
self.RobotPosOldY = y
return False
def getRobotPos(self):
return self.RobotPosX, self.RobotPosY
def display(self):
s = [[str(e) for e in row] for row in self.RobotMap]
lens = [len(max(col, key=len)) for col in zip(*s)]
fmt = '\t'.join('{{:{}}}'.format(x) for x in lens)
table = [fmt.format(*row) for row in s]
print '\n'.join(table)
def updateServoPos(self, newServoPos):
self.ServoPos = newServoPos
def updateSonarCalcMapVal(org, new):
mapThresholdFree = templ_mapThresholdFree
mapThresholdOcc = templ_mapThresholdOcc
#oldval
if org <= mapThresholdFree:
oldval = 0
elif mapThresholdFree < org < mapThresholdOcc:
oldval = 1
elif org >= mapThresholdOcc:
oldval = 2
# newval
if new <= mapThresholdFree:
newval = 0
elif mapThresholdFree < new < mapThresholdOcc:
newval = 1
elif new >= mapThresholdOcc:
newval = 2
if oldval != newval:
return newval
else:
return 'n'
def dur(op=None, clock=[time.time()]):
if op != None:
duration = time.time() - clock[0]
print '%s finished. Duration %.6f seconds.' % (op, duration)
clock[0] = time.time()
def updateIRWrite(RobotPos, coord, updateval):
XtoUpdate = RobotPos[0] + coord[0]
YtoUpdate = RobotPos[1] - coord[1]
newval = updateSonarCalcMapVal(*mymap.updateMap(XtoUpdate, YtoUpdate, updateval))
########### main Script #############
mymap = NewRobotMap(templ_MapWidth, templ_MapHeight, templ_Resolution, templ_StartPosX, templ_StartPosY, templ_StartTheta, templ_ServoPos, templ_mapMaxOcc, templ_mapMaxFree, templ_mapThresholdOcc, templ_EmptyValue)
mymap.clear()
dur()
for x in xrange(0, 10001*40):
updateIRWrite((100, 100), (10, 10), 1)
dur("loops")
Conclusions
The code definitely needs review for doing correct work. E.g. there are methods, which are not used at all and other calls, which never use returned value.
But some optimization could be shown. Generally, following is good to follow:
Make your code running correctly first
Clarify what is acceptable speed, do not optimize, if not necessary
Measure, profile
Start optimizing in busiest loops, there are best chances to speed things up. In them, each line of code counts.

Can you install PyPy and run your script with it instead of CPython (the default) ? It should work as a drop-in replacement of CPython.
http://pypy.org/
It is based on (tracing?) JIT and famous for its high runtime performance.
http://speed.pypy.org/

Can't return an element in python at a certain position on a grid

i am a beginner to Python and i'm having some problem with a project.
My grid looks like this :
class World(object):
def __init__(self):
self.grid = []
xsize = 20
ysize = 20
self.maxX = xsize
self.maxY = ysize
for irow in range(self.maxY):
row = []
for icol in range(self.maxX):
row.append(None)
self.grid.append(row)
positions = []
for i in range(self.maxX):
for j in range(self.maxY):
positions.append((i,j))
numteam1 = 0
numteam2 = 0
numrobot = 0
randpos = random.sample(positions, numteam1 + numteam2)
team1 = randpos[0:numrobot-1]
team2 = randpos[numrobot:]
for point in team1:
x = point[0]
y = point[1]
self.grid[y][x] = AttackRobot(1, x, y, self)
self.grid[y][x] = MedicRobot(1, x, y, filename)
for point in team2:
x = point[0]
y = point[1]
self.grid[y][x] = AttackRobot(2,x,y,self)
self.grid[y][x] = MedicRobot(2,x,y,self, filename)
and then i have this method:
def test_position(self, x, y):
if x <0 or x >= 20:
return None
if y <0 or y >= 20:
return None
else:
return ( self.grid[y][x] = Robot)
this fonction is supposed to return element at (x,y) on the grid
Then i use this method in this method :
def print_board(w):
print "-" * 60
for y in range(19,-1,-1):
line = ""
for x in range(0,20):
r = w.test_position(x, y)
if r == None:
line += ".... "
else:
if isinstance(r, AttackRobot):
rtype = "A"
elif isinstance(r, MedicRobot):
rtype = "M"
else:
rtype = "!"
if r.get_team() == 1:
line += "%s%02i%s " % (rtype, r.get_health(), r.get_direction())
else:
line += "%s%02i%s " % (rtype.lower(), r.get_health(), r.get_direction())
print line
print "-" * 60
and i get an error.
Traceback (most recent call last):
File "T05.py", line 10, in <module>
print_board(world)
File "/Users/quentindumoulin/Desktop/test.py", line 19, in print_board
if r.get_team() == 1:
AttributeError: 'bool' object has no attribute 'get_team'

return ( self.grid[y][x] = Robot)
What is it? Maybe you want
return self.grid[y][x]
?

PIL saving only the first image

I'm trying to do some batch image processing, but I'm having trouble saving the images once they are created. Here is all of the code:
import Image
import os
import random
training_images = []
training_path = 'cropped'
background_images = []
background_path = 'background'
training_file = 'train'
def get_image_list(file_path):
return os.listdir(file_path)
def rotate_randomely(im):
number = random.randint(1, 6)
if number == 1:
return im.transpose(Image.FLIP_LEFT_RIGHT)
elif number == 2:
return im.transpose(Image.FLIP_TOP_BOTTOM)
elif number == 3:
return im.transpose(Image.ROTATE_90)
elif number == 4:
return im.transpose(Image.ROTATE_180)
elif number == 5:
return im.transpose(Image.ROTATE_270)
else:
return im
def get_random_point(maxX, maxY):
x = random.randint(0, maxX)
y = random.randint(0, maxY)
return x, y
def insert_image(from_image, onto_image):
from_image = resize_smaller(from_image, onto_image.size)
x, y = get_random_point(onto_image.size[0] - from_image.size[0], onto_image.size[1] - from_image.size[0])
onto_image.paste(from_image, (x, y))
width = from_image.size[0]
height = from_image.size[1]
return x, y, width, height
def resize_smaller(image, maxXY):
if image.size[0] > maxXY[0] or image.size[1] > maxXY[1]:
image = image.resize((image.size[0] / 2, image.size[1] / 2))
if image.size[0] > maxXY[0] or image.size[1] > maxXY[1]:
resize_smaller(image, maxXY)
else:
return image
training_images = get_image_list(training_path)
background_images = get_image_list(background_path)
print('training_images size', len(training_images))
print('background_images size', len(background_images))
for training_image in training_images:
index = 0
for background_image in background_images:
name = background_image
training_image = Image.open(training_path + '/' + training_image)
background_image = Image.open(background_path + '/' + background_image)
training_image = rotate_randomely(training_image)
x, y, width, height = insert_image(training_image, background_image)
background_image.save('images/' + str(index) + name)
index = index + 1
The output:
('training_images size', 7)
('background_images size', 1). So it's finding the images correctly, but when I look at the results there is only one image saved, and it only has a 0 pre-pended to the image name. Yet I know it went through each image so there should be seven of them.
I've been looking at this for a while, and I just don't see where I went wrong. Is there something weird about pil's save method that I'm not aware of?

put the index = 0 outside the upper for loop otherwise it will become 0 every iteration and save over the top of old files.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extracting the detected objects from image by PILLOW library - python

Related

tkinter - Infinite Canvas "world" / "view" - keeping track of items in view

Python/OpenCV - Not detecting grid

python script speed improvements

Can't return an element in python at a certain position on a grid

PIL saving only the first image

Categories

Resources