I am using Pygame to render a Sprite in a window and I want to play a sinusoidal sound which frequency depends on the y position of that Sprite. I don't want discontinuities in the phase of the signal. What's the best way to achieve this?
I came up with this solution.
Change the freq to newfreq and then change the phase like this: newphase = 2*np.pi*t*(freq-newfreq)+phase
import pyaudio
import numpy as np
from time import time
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[::2] = left
data[1::2] = left
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
start = time()
try:
while 1:
now = time()
if now-start>1/24.:
newfreq=200+np.sin(2*np.pi*1/20.*now)*100 #update the frequency This will depend on y on the future
print newfreq
start=now
finally:
stream.stop_stream()
stream.close()
p.terminate()
Related
I'm trying to make a program that generates sounds from a wifi probe log, so that the number of devices (within a certain distance) generate a tone, and that the rssi is to be the frequence.
I'm trying to make it as real time as possible, but can't figure out how to make the tones continuous and change the frequence based on value change.
'''
This program takes a log file from a
wifi probe and translates it into sound
'''
import time
import math #import needed modules
import pyaudio #sudo apt-get install python-pyaudio
import threading
from threading import Thread
from pydub import AudioSegment
from pydub.generators import Sine
from pydub.playback import play
import signal
def logData():
'''
Takes log file data and puts it into database
updates every 1 sec
'''
global dic
global tone
tone = []
dic = {}
while True:
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as f:
for line in f:
(key, val) = line.split()
if val <= str(-50):
dic[(key)] = val
print (dic)
time.sleep(1)
def sound():
'''
Generate sounds
'''
# Play final tone
while (True):
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as i:
try:
tone1 = Sine(abs(int(list(dic.values())[0]))).to_audio_segment(3000)
tone2 = Sine(abs(int(list(dic.values())[1]))).to_audio_segment(3000)
tone3 = Sine(abs(int(list(dic.values())[2]))).to_audio_segment(3000)
except:
print('Index error')
try:
multitone1 = tone1
multitone2 = tone1.overlay(tone2)
multitone3 = tone3.overlay(multitone2)
except:
print('Multitone error')
try:
if len(dic) <= 1:
play(multitone1.fade_in(250).fade_out(250))
elif len(dic) == 2:
play(multitone2.fade_in(250).fade_out(250))
elif len(dic) >= 3:
play(multitone3.fade_in(250).fade_out(250))
except:
print('Playback error')
if __name__ == '__main__':
try:
Thread(target = logData).start()
time.sleep(1)
Thread(target = sound).start()
except KeyboardInterrupt:
print('Interrupted')
I was able to craft a boiler-plate solution that you can tailor per your needs.
Here's central idea
1) Read last line of log file using os.popen in a continuous loop that repeats every second
2) the RSSI value is very small and the difference between these values is also small. We multiply that by constant 100 here to create noticeable difference. You can try different values.
3) Using pydub we create sine tones and play them
Code
from pydub.generators import Sine
from pydub import AudioSegment
from pydub.playback import play
import os
import time
sr = 44100 # sample rate
bd = 16 # bit depth
l = 50.0 # duration in millisec
last_line = "" #to avoid same line played again
log_file = "probemon.log"
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
if last_line == line:
pass
else:
key, val = line.split()
f = abs(int(val)) * 100
#create sine wave of given freq
sine_wave = Sine(f, sample_rate=sr, bit_depth=bd)
#Convert waveform to audio_segment for playback and export
sine_segment = sine_wave.to_audio_segment(duration=l)
print "mac:{} , rssi:{}".format(key,val)
#Play audio segment
play(sine_segment)
last_line = line
time.sleep(1) #sleep 1 sec, synch this with log file fill
I tested by filling up probemon.log file line by line from a different terminal with a delay of 1 second. The loop will wait if there is no new data.
EDIT1
Audio "tone" have "frequency" , when you change Frequency, tone changes.
Based on our discussion, since we need tone to vary in real-time, we can't use pydub which is mostly good for offline manipulation.
pyaudio has a non-blocking method using callback, which allows manipulating stream data while its being played in realtime.
This solution plays audio based on last line of log continuously until log data changes.
This solution also eliminated popping / cracking sound that occurs when merging two tones.
Inspiration from here.
import pyaudio
import numpy as np
from time import time,sleep
import os
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
log_file = "probemon.log"
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[0::2] = left #left data
data[1::2] = left #right data
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
tmphold = ""
try:
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
try:
key, val = line.split()
except:
key, val = "default", 0.0
f = abs(int(val))
newfreq = f * 10 #update freq per log
if newfreq != tmphold:
tmphold = newfreq
print "mac:{} , rssi:{} , freq:{}
finally:
stream.stop_stream()
stream.close()
p.terminate()
Result
mac:default , rssi:0.0 , freq:0 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-43 , freq:430 Hz
mac:04:4f:4c:77:72:8f , rssi:-51 , freq:510 Hz
mac:20:39:56:af:51:49 , rssi:-39 , freq:390 Hz
mac:20:39:56:af:51:49 , rssi:-45 , freq:450 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-47 , freq:470 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-49 , freq:490 Hz
mac:12:84:16:9c:75:ee , rssi:-43 , freq:430 Hz
mac:da:a1:19:71:4d:0c , rssi:-55 , freq:550 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-49 , freq:490 Hz
So I have been working on making an equalizer and the problem I am facing is that the pyaudio stream is streaming much faster than the speed with which the eq. is finding the bass component of the audio file. I will briefly outline the implementation:
I have created two extra threads and have used tkinter for the gui. Thread 1 computes the bass component (fn bass() ) of the sound in chunks of 50ms data.
Thread 2 plots that by actually creating a rectangle in tkinter with varying top left coordinates.
flag2 keeps the main thread running, while flag synchronizes the bass() and plot() functions. The last part of the code is to ensure that the display doesn't go faster than the song itself( however the exact opposite is the concern right now).
I am attaching the code here:
import numpy as np
from scipy.io import wavfile
from numpy import fft as fft
import time
import tkinter as tk
import threading
import pyaudio
import wave
CHUNK = 1024
wf = wave.open("test3.wav", 'rb')
p = pyaudio.PyAudio()
###
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
stream_callback=callback)
####
rate,audData = wavfile.read("test3.wav")
print ("Rate "+str(rate))
print ("Length of wav file(in s) = " + str(audData.shape[0]/rate))
ch1=audData[:]
tim = 0.050
pt=int(tim*rate)
flag2 = True
flag = False
cnt = 0
value=0
def bass():
global pt
global cnt
global audData
global value
global flag2
global flag
cnt +=1
fourier=fft.fft(ch1[((cnt-1)*pt):((cnt)*pt)])
fourier = abs(fourier) / float(pt)
fourier = fourier[0:25]
fourier = fourier**2
if (cnt+1)*pt > len(audData[:]) :
flag2 = False
value = (np.sum(fourier))/pt
flag= True
return
def plot():
global value
global flag
root=tk.Tk()
canvas =tk.Canvas(root,width=200,height=500)
canvas.pack()
while True:
if flag:
canvas.delete("all")
flag=False
greenbox = canvas.create_rectangle(50,500-(value/80),150,500,fill="green")
print(value/80) # to check whether it excees 500
root.update_idletasks()
root.update()
return
def sound():
global data
global stream
global wf
global CHUNK
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
bass()
t1 = threading.Thread(target=plot, name='t_1')
t2 = threading.Thread(target=sound, name='t_2')
t1.start()
t2.start()
while flag2:
a = time.time()
bass()
b=time.time()
while (b-a) < tim :
time.sleep(0.015)
b=time.time()
To overcome this processing speed problem, I tried to process 1 in every 3 chunks:
cnt +=1
fourier=fft.fft(ch1[((3*cnt-3)*pt):((3*cnt-2)*pt)])
fourier = abs(fourier) / float(pt)
fourier = fourier[0:25]
fourier = fourier**2
if (3*cnt+1)*pt > len(audData[:]) :
flag2 = False
#######
while (b-a) < 3*tim :
time.sleep(0.015)
b=time.time()
But this even this is not up to the mark. The lag is visible after a few seconds. Any ideas on how to improve this?
Instead of efficiency, a more realistic solution might be delay matching. If you can determine the latency of your FFT and display (etc.) processes, the you can either delay sound output (using a fifo of some number of audio samples), or have the visualization process look ahead in the playback file read by the equivalent number of samples.
I want to write a very basic application that passes audio from microphone to speakers. This is very simple with pyaudio as described on https://people.csail.mit.edu/hubert/pyaudio/ .
def passthrough():
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
p.terminate()
But now I try to mix a wave file into this stream, when an event occurs. And that's where I am stuck right now. Playing a wave file seems to be easy, too.
def play_wave(wav_file):
wf = wave.open(wav_file, 'rb')
sample_width=wf.getsampwidth()
channels=wf.getnchannels()
rate=wf.getframerate()
second=sample_width*channels*rate
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(sample_width),
channels=channels,
rate=int(rate),
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
At this time, I have two problems.
How do I mix the wave output into the continuous stream
How can I trigger 1. on an event basis
Hope someone can light up the dark basement I am in right now.
EDIT: Assume the wave file to have same number of channels and same rate, so no conversion necessary.
After moving the throughput() function into a thread it works like desired. When I tried this yesterday, I just fucked up the thread start (called throughput from init instead in run() method).
So here the complete, working code.
import pyaudio
import wave
import threading
import time
class AudioPass(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
self.passthrough()
def passthrough(self):
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
def callback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
p.terminate()
def play_wave(wav_file):
wf = wave.open(wav_file, 'rb')
sample_width=wf.getsampwidth()
channels=wf.getnchannels()
rate=wf.getframerate()
second=sample_width*channels*rate
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(sample_width),
channels=channels,
rate=int(rate),
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
thread = AudioPass()
thread.start()
play_wave('C:/bell.wav')
Later I will also try another way a colleauge suggested today and if it does well too, I will put it here as an alternative, too. Using the threaded way is nice because I can use different rates for the stream and the wav file.
A colleague provided the below solution, which is a very raw approach, but it works and is good for understanding how this pyaudio stuff works.
import time
import pyaudio
import numpy
WIDTH = 2
CHANNELS = 1
RATE = 44100
p = pyaudio.PyAudio()
SINE_WAVE_FREQUENCY = 440.0 # In Hz
SINE_WAVE_DURATION = 5.0 # In seconds
SINE_WAVE_VOLUME = 0.5
SINE_WAVE = (numpy.sin(2 * numpy.pi * numpy.arange(RATE * SINE_WAVE_DURATION) * SINE_WAVE_FREQUENCY / RATE)).astype(numpy.float32) * SINE_WAVE_VOLUME
def loopback(in_data, frame_count, time_info, status):
return (in_data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(WIDTH), channels=CHANNELS, rate=RATE, input=True, output=True, stream_callback=loopback)
stream.start_stream()
def playsine():
sinestream = p.open(format=pyaudio.paFloat32, channels=1, rate=RATE, output=True)
sinestream.write(SINE_WAVE)
sinestream.stop_stream()
sinestream.close()
while True:
input("Press enter to play a sine wave")
playsine()
I'm trying to create an infinitely long pure sine tone in Python (so I can later add realtime on/off events) and while I can get a tone playing, it is choppy and clipped. My assumption is that it's either from the next chunk not starting in the same place in the wave's cycle as the last chunk ended, or that there is a delay in calculating the next chunk, and I have no idea which it is.
Are either of those things occurring, or have I made some other error? Moreover, is there a better approach to take without sacrificing the ability to alter the incoming data in real time?
import time
import numpy
import pyaudio
import math
CHUNK = 4096
RATE = 44100
def sine(current_time, frequency=440):
length = CHUNK
factor = float(frequency) * (math.pi * 2) / RATE
this_chunk = numpy.arange(length) + current_time
return numpy.sin(this_chunk * factor)
def get_chunk():
data = sine(time.time())
return data * 0.1
def callback(in_data, frame_count, time_info, status):
chunk = get_chunk() * 0.25
data = chunk.astype(numpy.float32).tostring()
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format = pyaudio.paFloat32,
channels = 2,
rate = RATE,
output = True,
stream_callback = callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
Looks like you need some optimization see,
https://wiki.python.org/moin/PythonSpeed/PerformanceTips
try:
import time
import numpy
import pyaudio
import math
CHUNK = 4096
RATE = 44100
MP = math.pi
NA = numpy.arange
NS = numpy.sin
TT = time.time
NF32 = numpy.float32
p = pyaudio.PyAudio()
PO = p.open
PC = pyaudio.paContinue
PF = pyaudio.paFloat32
TS = time.sleep
def sine(current_time, frequency=440):
length = CHUNK
factor = float(frequency) * (MP * 2) / RATE
this_chunk = NA(length) + current_time
return NS(this_chunk * factor)* 0.025
def callback(in_data, frame_count, time_info, status):
chunk = sine(TT())
data = chunk.astype(NF32).tostring()
return (data, PC)
stream = PO(format = PF,
channels = 2,
rate = RATE,
output = True,
stream_callback = callback)
stream.start_stream()
while stream.is_active():
TS(0.1)
stream.stop_stream()
stream.close()
I have gotten both OpenCV and PyAudio working however I am not sure how I would sync them together. I am unable to get a framerate from OpenCV and measuring the call time for a frame changes from moment to moment. However with PyAudio it's basis is grabbing a certain sample rate. How would I sync them to be at the same rate. I assume there is some standard or some way codecs do it. (I've tried google all I got was information on lip syncing :/).
OpenCV Frame rate
from __future__ import division
import time
import math
import cv2, cv
vc = cv2.VideoCapture(0)
# get the frame
while True:
before_read = time.time()
rval, frame = vc.read()
after_read = time.time()
if frame is not None:
print len(frame)
print math.ceil((1.0 / (after_read - before_read)))
cv2.imshow("preview", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
print "None..."
cv2.waitKey(1)
# display the frame
while True:
cv2.imshow("preview", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
Grabbing and saving audio
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
print "\n\n\n\n\n\n\n\n"
print max(snd_data)
print "\n\n\n\n\n\n\n\n"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('h')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('h')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
r = array('h', [0 for i in xrange(int(seconds*RATE))])
r.extend(snd_data)
r.extend([0 for i in xrange(int(seconds*RATE))])
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('h')
while 1:
# little endian, signed short
snd_data = array('h', stream.read(1024))
if byteorder == 'big':
snd_data.byteswap()
print "\n\n\n\n\n\n"
print len(snd_data)
print snd_data
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 1:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('h'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
I think you'd be better off using either GSreamer or ffmpeg, or if you're on Windows, DirectShow. These libs can handle both audio and video, and should have some kind of a Multiplexer to allow you to mix video and audio properly.
But if you really want to do this using Opencv, you should be able to use VideoCapture to get the frame rate, have you tried using this?
fps = cv.GetCaptureProperty(vc, CV_CAP_PROP_FPS)
Another way would be to estimate fps as number of frames divided by duration:
nFrames = cv.GetCaptureProperty(vc, CV_CAP_PROP_FRAME_COUNT)
cv.SetCaptureProperty(vc, CV_CAP_PROP_POS_AVI_RATIO, 1)
duration = cv.GetCaptureProperty(vc, CV_CAP_PROP_POS_MSEC)
fps = 1000 * nFrames / duration;
I'm not sure I understand what you were trying to do here:
before_read = time.time()
rval, frame = vc.read()
after_read = time.time()
It seems to me that doing after_read - before_read only measures how long it took for OpenCV to load the next frame, it doesn't measure the fps. OpenCV is not trying to do playback, it's only loading frames and it'll try to do so the fastest it can and I think there's no way to configure that. I think that putting a waitKey(1/fps) after displaying each frame will achieve what you're looking for.
You could have 2 counters 1 for audio and one for video.
The video counter will become +(1/fps) when showing an image and audio +sec where sec the seconds of audio you are writing to the stream each time. Then on audio part of the code you can do something like
While audiosec-videosec>=0.05: # Audio is ahead
time.sleep(0.05)
And on video part
While videosec-audiosec>=0.2:# video is ahead
time.sleep(0.2)
You can play with the numbers
This is how i achieve some sort of synchronization on my own video player project using pyaudio recently ffmpeg instead of cv2.
personally i used threading for this.
import concurrent.futures
import pyaudio
import cv2
class Aud_Vid():
def __init__(self, arg):
self.video = cv2.VideoCapture(0)
self.CHUNK = 1470
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 2
self.RATE = 44100
self.audio = pyaudio.PyAudio()
self.instream = self.audio.open(format=self.FORMAT,channels=self.CHANNELS,rate=self.RATE,input=True,frames_per_buffer=self.CHUNK)
self.outstream = self.audio.open(format=self.FORMAT,channels=self.CHANNELS,rate=self.RATE,output=True,frames_per_buffer=self.CHUNK)
def sync(self):
with concurrent.futures.ThreadPoolExecutor() as executor:
tv = executor.submit(self.video.read)
ta = executor.submit(self.instream.read,1470)
vid = tv.result()
aud = ta.result()
return(vid[1].tobytes(),aud)