I'm practicing spoken language recognition code from 'https://github.com/YerevaNN/Spoken-language-identification'.
Input 'csv' dataset is downloaded from 'https://gist.github.com/Harhro94/aa11fe6b454c614cdedea882fd00f8d7'
First task is to convert the inputs into spectrograms. I tried this code but showing error. You can use any audio file(wav file) for this example.
original code from github :'https://github.com/YerevaNN/Spoken-language-identification/blob/master/create_spectrograms.py
import numpy as np
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from numpy.lib import stride_tricks
import PIL.Image as Image
import os
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig)
cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize),
strides=(samples.strides[0] * hopSize,
samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
def logscale_spec(spec, sr=44100, factor=20, alpha=1.0, f0=0.9, fmax=1):
spec = spec[:, 0:256]
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) # ** factor
scale = np.array(map(lambda x: x * alpha
if x <= f0 else (fmax - alpha * f0) / (fmax - f0) *
(x - f0) + alpha * f0, scale))
scale *= (freqbins - 1) / max(scale)
newspec = np.complex128(np.zeros([timebins, freqbins]))
allfreqs = np.abs(np.fft.fftfreq(freqbins * 2, 1. / sr)[:freqbins + 1])
freqs = [0.0 for i in range(freqbins)]
totw = [0.0 for i in range(freqbins)]
for i in range(0, freqbins):
if (i < 1 or i + 1 >= freqbins):
newspec[:, i] += spec[:, i]
freqs[i] += allfreqs[i]
totw[i] += 1.0
continue
else:
w_up = scale[i] - np.floor(scale[i])
w_down = 1 - w_up
j = int(np.floor(scale[i]))
newspec[:, j] += w_down * spec[:, i]
freqs[j] += w_down * allfreqs[i]
totw[j] += w_down
newspec[:, j + 1] += w_up * spec[:, i]
freqs[j + 1] += w_up * allfreqs[i]
totw[j + 1] += w_up
for i in range(len(freqs)):
if (totw[i] > 1e-6):
freqs[i] /= totw[i]
return newspec, freqs
def plotstft(audiopath, binsize=2 ** 10, plotpath=None, colormap="gray",
channel=0, name='sampleaudio.png', alpha=1, offset=0):
samplerate, samples = wav.read(audiopath)
samples = samples[:, channel]
s = stft(samples, binsize)
sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
sshow = sshow[2:, :]
ims = 20. * np.log10(np.abs(sshow) / 10e-6)
timebins, freqbins = np.shape(ims)
ims = np.transpose(ims)
ims = ims[0:256, :]
image = Image.fromarray(ims)
image = image.convert('L')
image.save(name)
file = open('trainingData.csv', 'r')
for iter, line in enumerate(file.readlines()[1:]):
filepath = line.split(',')[0]
filename = filepath[:-4]
wavfile = 'sampleaudio.wav'
os.system('mpg123 -w'+wavfile+'/C:/AnacondaProj/sampaudio.wav/'+ filepath)
plotstft(wavfile,channel=0,name='/C:/AnacondaProj/sampaudio.wav/'+
filename+'.png', alpha=1)
# os.remove(wavfile)
print("processed %d files" % (iter + 1))
Thankyou
Related
I am trying to convert some of my pictures to black and white. I have this so far
import image
def black_and_white(pic):
for y in range(pic.getHeight()):
for x in range(pic.getWidth()):
p = pic.getPixel(x,y)
r = p.getRed()
g = p.getGreen()
b = p.getBlue()
if x > 0.128:
x = .255 * r + .255 * g +.255 * b
else:
x = .0 * r + .0 * g +.0 * b
x = int(x) ## to convert it to an integer
newp = image.Pixel(x, x, x) ## to convert to a new pixel
pic.setPixel(x, y, newp)
return pic
def main():
bell = image.Image("luther.jpg")
width = bell.getWidth()
height = bell.getHeight()
win = image.ImageWin(width, height)
bell.draw(win)
gs_bell = grayscale(bell)
gs_bell.draw(win)
main() ## starts execution
If anyone could give me some advice, I would greatly appreciate it!
I am truly sorry for the lack of clarity. Here is the image I am getting.
enter image description here
Here's a complete working example, using PIL (because I don't know where your import image is coming from, you didn't say).
import sys
import argparse
import PIL.Image # https://pillow.readthedocs.io/en/3.1.x/reference/Image.html
import os
def black_and_white(pic):
for y in range(pic.size[1]):
for x in range(pic.size[0]):
r, g, b = pic.getpixel((x, y))
v = 0.2989 * r + 0.5870 * g + 0.1140 * b
if v > 128.0:
v = 255
else:
v = 0
v = int(v) ## to convert it to an integer
pic.putpixel((x, y), (v, v, v))
def main(options):
try:
image = PIL.Image.open(options.filename)
except:
print('ERROR: Could not open %s' % (options.filename))
else:
black_and_white(image)
basename = os.path.splitext(options.filename)[0]
image.save(basename + '_solution' + '.jpg', 'JPEG')
return 0
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'filename',
help='Image file.')
options = parser.parse_args()
sys.exit(main(options))
This works for me:
Your code has numerous bugs. You're using x as a pixel value, when it is actually a pixel coordinate.
I would change this:
if x > 0.128:
x = .255 * r + .255 * g +.255 * b
else:
x = .0 * r + .0 * g +.0 * b
x = int(x) ## to convert it to an integer
newp = image.Pixel(x, x, x) ## to convert to a new pixel
to this:
v = 0.2989 * r + 0.5870 * g + 0.1140 * b
if v > 128.0:
v = 255
else:
v = 0
v = int(v) ## to convert it to an integer
newp = image.Pixel(v, v, v) ## to convert to a new pixel
The RGB weighting values come from this article.
This is the code:
import numpy as np
import cv2
import os
big_img_path = 'bloodredboy.jpg'
resolution = 0.05
small_img_path = 'Minecraft'
def LoadImages():
global small_img_path
imagelist = list(os.walk(small_img_path))[0][2]
images = {}
for im in imagelist:
img = cv2.imread(r'C:\Users\VVA\Desktop\\'+small_img_path+'\\'+im)
r = 0
g = 0
b = 0
count = 0
for y in img:
for x in y:
r += x[0]
g += x[1]
b += x[2]
count += 1
r /= count
b /= count
g /= count
images[r'C:\Users\VVA\Videos\Desktop\\'+small_img_path+'\\'+im] = [r, g, b]
return images
def change_resolution(img, resolution):
res = []
height = len(img)
width = len(img[0])
yy = 0
for y in range(0, height, round(1/resolution)):
yy += 1
xx = 0
res.append([])
for x in range(0, width, round(1/resolution)):
res[yy-1].append(img[y][x])
xx += 1
return np.array(res)
def ask_block(rgb):
global images
record_block = None
record_value = 99999999999999
for image in images.items():
r = abs(rgb[0] - image[1][0])
g = abs(rgb[1] - image[1][1])
b = abs(rgb[2] - image[1][2])
if r+g+b < record_value:
record_block = image[0]
record_value = r+g+b
return record_block
def make_img_out_of_blocks(img):
count = 0
total = len(img) * len(img[0])
rows = []
yy = 0
for y in img:
yy += 1
xx = 0
row = []
for x in y:
block = ask_block(x)
block = cv2.imread(block)
try:
row = np.concatenate((row, block), axis=1)
except:
row = block
xx += 1
count += 1
rows.append(row)
print(f'{round(count/total*100, 2)}% Completed')
for row in rows:
try:
res = np.concatenate((res, row), axis=0)
except:
res = row
return res
print('Loading Images...')
images = LoadImages()
img = cv2.imread(big_img_path)
print('Done')
if resolution != 1:
print('Changing resolution...')
img = change_resolution(img, resolution)
print('Done')
print('Transforming to Blocks...')
img = make_img_out_of_blocks(img)
print('Done')
cv2.imwrite('output.jpg', img)
cv2.imshow('Result', img)
cv2.waitKey(0)
and this is the error:
I tried to change the file path file name folder images
but no progress.
As you can see i am using numpy and cv2
Loading Images...
Traceback (most recent call last):
File "C:\Users\VVA\Desktop\Image_out_of_Minecraft_Blocks.py", line 94, in
images = LoadImages()
File "C:\Users\VVA\Desktop\Image_out_of_Minecraft_Blocks.py", line 20, in LoadImages
for y in img:
TypeError: 'NoneType' object is not iterable
When i run this code
import numpy as np
import cv2
from sklearn.datasets import fetch_mldata
from skimage.measure import label, regionprops
from sklearn.neighbors import KNeighborsClassifier
def train(data, target):
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(data, target)
return knn
def move(image, x, y):
img = np.zeros((28, 28))
img[:(28-x), :(28-y)] = image[x:, y:]
return img
def fill(image):
if np.shape(image)!=(28, 28):
img = np.zeros((28,28))
x = 28 - np.shape(image)[0]
y = 28 - np.shape(image)[1]
img[:-x,:-y] = image
return img
else:
return image
def my_rgb2gray(img_rgb):
img_gray = 0.5*img_rgb[:, :, 0] + 0*img_rgb[:, :, 1] + 0.5*img_rgb[:, :, 2]
img_gray = img_gray.astype('uint8')
return img_gray
def my_rgb2gray2(img_rgb):
frame = img_rgb
grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
ret, frame_bw = cv2.threshold(grey, 170, 255, 0)
frame_bw = cv2.morphologyEx(frame_bw, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)))
return frame_bw
def count_images(framecal):
regions = label(framecal)
labels = regionprops(regions)
images = []
for i in range(0, len(labels)):
if labels[i].centroid[0] < result[0] and labels[i].centroid[1] < result[1]:
images.append(labels[i].image)
count = 0
for img in images:
obrada = fill(np.array(img.astype('uint8')))
count += model.predict(obrada.reshape(1, -1))
return count
def check2(indices, i):
check = False
for el in indices:
if (el == i):
check = True
break
return check
def findPoints(lines):
Xmin = 1000
Ymin = 1000
Ymax = 0
Xmax = 0
for i in range(len(lines)):
for x1, y1, x2, y2 in lines[i]:
if x1 < Xmin:
Xmin = x1
Ymin = y1
if x2 > Xmax:
Ymax = y2
Xmax = x2
return Xmin, Ymin, Xmax, Ymax
def hough(frame, gray, min_line_len, max_line_gap):
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
cv2.imwrite('line.png', frame)
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 40, min_line_len, max_line_gap)
minx, miny, maxx, maxy = findPoints(lines)
cv2.line(frame, (minx, miny), (maxx, maxy), (233, 0, 0), 2)
return minx, miny, maxx, maxy
homepath = 'SoftVideoData/'
videopaths = ['video-0.avi',
'video-1.avi',
'video-2.avi',
'video-3.avi',
'video-4.avi',
'video-5.avi',
'video-6.avi',
'video-7.avi',
'video-8.avi',
'video-9.avi']
mnist = fetch_mldata('MNIST original')
data = mnist.data>0
data = data.astype('uint8')
target = mnist.target
fixed = np.empty_like(data)
for i in range(0, len(data)):
l = label(data[i].reshape(28, 28))
r = regionprops(l)
min_x = r[0].bbox[0]
min_y = r[0].bbox[1]
for j in range(1, len(r)):
if r[j].bbox[0] < min_x:
min_x = r[j].bbox[0]
if r[j].bbox[1] < min_y:
min_y = r[j].bbox[1]
img = move(data[i].reshape(28, 28), min_x, min_y)
fixed[i] = img.reshape(784, )
model = train(fixed, target)
for index in range(0,9):
total = 0
video = cv2.VideoCapture(homepath + videopaths[index])
flag, frame = video.read()
bw = my_rgb2gray(frame)
result = hough(frame, bw, 10, 50)
while 1:
flag1, frame1 = video.read()
last_count = total
if flag1 is True:
bwframe = my_rgb2gray2(frame1)
curr_count = count_images(bwframe)
if curr_count <= last_count:
last_count = curr_count
else:
total += curr_count - last_count
last_count = curr_count
print total
k = cv2.waitKey(15) & 0xff
if k == 27:
break
else:
break
with open('out.txt', 'a') as file:
file.write(homepath + videopaths[index] + '\t' + str(total))
i get this error:
Traceback (most recent call last):
File "C:\Users\Joe\Desktop\SOFT-master7o\SoftProject.py", line 147, in <module>
bw = my_rgb2gray(frame)
File "C:\Users\Joe\Desktop\SOFT-master7o\SoftProject.py", line 35, in my_rgb2gray
img_gray = 0.5*img_rgb[:, :, 0] + 0*img_rgb[:, :, 1] + 0.5*img_rgb[:, :, 2]
TypeError: 'NoneType' object has no attribute '__getitem__'
What's wrong? Thanks
When you call a name with the square brackets, Python calla 'getitem' under the hood.
So it means that img_rgb is not what you expect it to be. Instead of a numpy array it is None.
Check the portion of code where img_rgb is assigned to.
Answer to comment:
Check your inputs before you do operations on them.
I would use the VS inbuild debugger and set a breakpoint on this line:
bw = my_rgb2gray(frame)
and inspect each frame if it is None before entering the function.
How to handle it if its None? Depends - either skip that video-frame or, if all are None, something is amiss and you need to check why flag, frame = video.read() produces a frame that is None. Sometimes the documentation will help you out:
https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html#videocapture
https://docs.opencv.org/2.4/modules/highgui/doc/reading_and_writing_images_and_video.html#videocapture-read
This is the function which generates a set of frames from an input video and stores them in a folder.
The full function:
def new_dewarp(self):
vidpath = self.iVidPath
def isInROI(x, y, R1, R2, Cx, Cy):
isInOuter = False
isInInner = False
xv = x-Cx
yv = y-Cy
rt = (xv*xv)+(yv*yv)
if(rt < R2*R2):
isInOuter = True
if(rt < R1*R1):
isInInner = True
return isInOuter and not isInInner
def buildMap(Ws, Hs, Wd, Hd, R1, R2, Cx, Cy):
map_x = np.zeros((Hd,Wd),np.float32)
map_y = np.zeros((Hd,Wd),np.float32)
rMap = np.linspace(R1, R1 + (R2 - R1), Hd)
thetaMap = np.linspace(0, 0 + float(Wd) * 2.0 * np.pi, Wd)
sinMap = np.sin(thetaMap)
cosMap = np.cos(thetaMap)
for y in xrange(0, int(Hd-1)):
map_x[y] = Cx + rMap[y] * sinMap
map_y[y] = Cy + rMap[y] * cosMap
return map_x, map_y
# do the unwarping
def unwarp(img, xmap, ymap):
output = cv2.remap(img.getNumpyCv2(), xmap, ymap, cv2.INTER_LINEAR)
result = Image(output, cv2image=True)
# return result
return result
#vidpath =
disp = Display((800, 600))
#disp = Display((1296,972))
vals = []
last = (0, 0)
# Load the video from the rpi
vc = VirtualCamera(vidpath, "video")
# Sometimes there is crud at the begining, buffer it out
for i in range(0, 10):
img = vc.getImage()
img.save(disp)
# Show the user a frame let them left click the center
# of the "donut" and the right inner and outer edge
# in that order. Press esc to exit the display
while not disp.isDone():
test = disp.leftButtonDownPosition()
if( test != last and test is not None):
last = test
print "[360fy]------- center = {0}\n".format(last)
vals.append(test)
# center of the "donut"
Cx = vals[0][0]
Cy = vals[0][1]
#print str(Cx) + " " + str(Cy)
# Inner donut radius
R1x = vals[1][0]
R1y = vals[1][1]
R1 = R1x-Cx
#print str(R1)
# outer donut radius
R2x = vals[2][0]
R2y = vals[2][1]
R2 = R2x-Cx
#print str(R2)
# our input and output image siZes
Wd = round(float(max(R1, R2)) * 2.0 * np.pi)
#Wd = 2.0*((R2+R1)/2)*np.pi
#Hd = (2.0*((R2+R1)/2)*np.pi) * (90/360)
Hd = (R2-R1)
Ws = img.width
Hs = img.height
# build the pixel map, this could be sped up
print ("BUILDING MAP!")
xmap,ymap = buildMap(Ws, Hs, Wd, Hd, R1, R2, Cx, Cy)
print ("MAP DONE!")
result = unwarp(img, xmap, ymap)
result.save(disp)
i = 0
while img is not None:
print "Frame Number: {0}".format(i)
result = unwarp(img, xmap, ymap)
result.save(disp)
# Save to file
fname = "vid_files/frames/FY{num:06d}.png".format(num=i)
result.save(fname)
img = vc.getImage()
i = i + 1
if img is None:
self.statusText.setText(str( "Status: Done"))
The section in above code which does the saving of frames:
while img is not None:
print "Frame Number: {0}".format(i)
result = unwarp(img, xmap, ymap)
result.save(disp)
# Save to file
fname = "vid_files/frames/FY{num:06d}.png".format(num=i)
result.save(fname)
img = vc.getImage()
i = i + 1
if img is None:
self.statusText.setText(str( "Status: Done"))
I want to know if there is anyway I can save the frames directly to a video( preferably mp4) with the frame rate and frame size of input video?
I can fetch the frame size and frame rate using ffprobe if needed.
Under the Windows I get this error. How to fix PIL?
This is error: TypeError: argument 1 must be ImagingCore, not ImagingCore
#!/usr/bin/python
## -*- coding: utf-8 -*-
from PIL import Image, ImageFont
import ImageDraw, StringIO, string
from random import *
from math import *
import os
SITE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
class Captcha3d(object):
_hypot = 4
_xx, _yy = 35, 70
_CIMAGE = None
_CHARS = string.ascii_lowercase + string.ascii_uppercase + string.digits
_TEXT = ''
def __init__(self):
self._CIMAGE = Image.new("RGB", (self._yy * self._hypot, self._xx * self._hypot), (255,255,255))
self.generateCode()
self.render()
def imageColorAllocate(self, r,g,b):
hexchars = "0123456789ABCDEF"
hexcolor = hexchars[r / 16] + hexchars[r % 16] + hexchars[g / 16] + hexchars[g % 16] + hexchars[b / 16] + hexchars[b % 16]
return int(hexcolor, 16)
def generateCode(self):
chars = self._CHARS
self._TEXT = "".join(choice(chars) for x in range(randint(3, 3)))
def getText(self):
return self._TEXT
def getProection(self, x1,y1,z1):
x = x1 * self._hypot
y = z1 * self._hypot
z = -y1 * self._hypot
xx = 0.707106781187
xy = 0
xz = -0.707106781187
yx = 0.408248290464
yy = 0.816496580928
yz = 0.408248290464 # 1/sqrt(6)
cx = xx*x + xy*y + xz*z
cy = yx*x + yy*y + yz*z + 20*self._hypot
return [cx, cy]
def zFunction(self, x,y):
z = 2.6
if self._CIMAGE.getpixel((y/2,x/2)) == (0,0,0):
z = 0
if z != 0:
z += float(randint(0,60))/100
z += 1.4 * sin((x+self.startX)*pi/15) * sin((y+self.startY)*pi/15)
return z
def render(self):
fontSans = ImageFont.truetype(os.path.join(SITE_PATH, "data", "fonts", "FreeSans.ttf"), 14)
draw = ImageDraw.Draw(self._CIMAGE)
whiteColor = 'white'
draw.rectangle([0, 0, self._yy * self._hypot, self._xx * self._hypot], fill=whiteColor)
#textColor = 'black'
#imgtext = Image.open("i8n.png")
#self._CIMAGE.paste(imgtext, (0,0))
imgtext = Image.new("1", (self._yy * self._hypot, self._xx * self._hypot), (1))
drawtext = ImageDraw.Draw(imgtext)
drawtext.text((1,0), self._TEXT, font=fontSans, fill=0)
self._CIMAGE.paste(imgtext, (0,0))
#draw.text((2,0), self.text, font=fontSans, fill=textColor)
self.startX = randint(0,self._xx)
self.startY = randint(0,self._yy)
crd = {}
x = 0
while x < (self._xx+1):
y = 0
while y < (self._yy+1):
crd[str(x) + '&' + str(y)] = self.getProection(x,y,self.zFunction(x,y))
y += 1
x += 1
x = 0
while x < self._xx:
y = 0
while y < self._yy:
coord = []
coord.append((int(crd[str(x) + '&' + str(y)][0]),int(crd[str(x) + '&' + str(y)][1])))
coord.append((int(crd[str(x+1) + '&' + str(y)][0]),int(crd[str(x+1) + '&' + str(y)][1])))
coord.append((int(crd[str(x+1) + '&' + str(y+1)][0]),int(crd[str(x+1) + '&' + str(y+1)][1])))
coord.append((int(crd[str(x) + '&' + str(y+1)][0]),int(crd[str(x) + '&' + str(y+1)][1])))
c = int(self.zFunction(x,y)*32)
linesColor = (c,c,c)
draw.polygon(coord, fill=whiteColor, outline=linesColor)
#draw.polygon(coord, fill=whiteColor)
y += 1
x += 1
draw.rectangle([0, 0, self._xx, self._yy], fill=whiteColor)
#draw.text((2,0), self.text, font=fontSans, fill=textColor)
#imageString($this->image, 1, 3, 0, (microtime(true)-$this->time), $textColor);
del draw
#self._CIMAGE.save("image.png", "PNG")
return [self._CIMAGE, self._TEXT]
def main():
a = Captcha3d()
print a.getText()
if __name__ == '__main__':
main()
Also happens for me on OSX 10.6.8, Python 2.6.5. I think some class is getting dynamically imported twice.
Try changing
from PIL import Image, ImageFont
to
import Image, ImageFont
That worked for me.
In my case it solved the situation to also import ImageDraw from PIL
from PIL import ImageDraw
importing all PIL things directly from PIL should always work.
However, if you mix imports, as such,
from PIL import Image
import ImageDraw
This can lead to conflict between two un-identical PIL libraries.
This can happen if you have installed both PIL and Pillow
We should really always do,
from PIL import Image
from PIL import ImageDraw
etc.
I.e. be specific about which package to use.