Real time continuous sounds with Pydub

Real time continuous sounds with Pydub - python

I'm trying to make a program that generates sounds from a wifi probe log, so that the number of devices (within a certain distance) generate a tone, and that the rssi is to be the frequence.
I'm trying to make it as real time as possible, but can't figure out how to make the tones continuous and change the frequence based on value change.
'''
This program takes a log file from a
wifi probe and translates it into sound
'''
import time
import math #import needed modules
import pyaudio #sudo apt-get install python-pyaudio
import threading
from threading import Thread
from pydub import AudioSegment
from pydub.generators import Sine
from pydub.playback import play
import signal
def logData():
'''
Takes log file data and puts it into database
updates every 1 sec
'''
global dic
global tone
tone = []
dic = {}
while True:
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as f:
for line in f:
(key, val) = line.split()
if val <= str(-50):
dic[(key)] = val
print (dic)
time.sleep(1)
def sound():
'''
Generate sounds
'''
# Play final tone
while (True):
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as i:
try:
tone1 = Sine(abs(int(list(dic.values())[0]))).to_audio_segment(3000)
tone2 = Sine(abs(int(list(dic.values())[1]))).to_audio_segment(3000)
tone3 = Sine(abs(int(list(dic.values())[2]))).to_audio_segment(3000)
except:
print('Index error')
try:
multitone1 = tone1
multitone2 = tone1.overlay(tone2)
multitone3 = tone3.overlay(multitone2)
except:
print('Multitone error')
try:
if len(dic) <= 1:
play(multitone1.fade_in(250).fade_out(250))
elif len(dic) == 2:
play(multitone2.fade_in(250).fade_out(250))
elif len(dic) >= 3:
play(multitone3.fade_in(250).fade_out(250))
except:
print('Playback error')
if __name__ == '__main__':
try:
Thread(target = logData).start()
time.sleep(1)
Thread(target = sound).start()
except KeyboardInterrupt:
print('Interrupted')

I was able to craft a boiler-plate solution that you can tailor per your needs.
Here's central idea
1) Read last line of log file using os.popen in a continuous loop that repeats every second
2) the RSSI value is very small and the difference between these values is also small. We multiply that by constant 100 here to create noticeable difference. You can try different values.
3) Using pydub we create sine tones and play them
Code
from pydub.generators import Sine
from pydub import AudioSegment
from pydub.playback import play
import os
import time
sr = 44100 # sample rate
bd = 16 # bit depth
l = 50.0 # duration in millisec
last_line = "" #to avoid same line played again
log_file = "probemon.log"
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
if last_line == line:
pass
else:
key, val = line.split()
f = abs(int(val)) * 100
#create sine wave of given freq
sine_wave = Sine(f, sample_rate=sr, bit_depth=bd)
#Convert waveform to audio_segment for playback and export
sine_segment = sine_wave.to_audio_segment(duration=l)
print "mac:{} , rssi:{}".format(key,val)
#Play audio segment
play(sine_segment)
last_line = line
time.sleep(1) #sleep 1 sec, synch this with log file fill
I tested by filling up probemon.log file line by line from a different terminal with a delay of 1 second. The loop will wait if there is no new data.
EDIT1
Audio "tone" have "frequency" , when you change Frequency, tone changes.
Based on our discussion, since we need tone to vary in real-time, we can't use pydub which is mostly good for offline manipulation.
pyaudio has a non-blocking method using callback, which allows manipulating stream data while its being played in realtime.
This solution plays audio based on last line of log continuously until log data changes.
This solution also eliminated popping / cracking sound that occurs when merging two tones.
Inspiration from here.
import pyaudio
import numpy as np
from time import time,sleep
import os
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
log_file = "probemon.log"
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[0::2] = left #left data
data[1::2] = left #right data
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
tmphold = ""
try:
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
try:
key, val = line.split()
except:
key, val = "default", 0.0
f = abs(int(val))
newfreq = f * 10 #update freq per log
if newfreq != tmphold:
tmphold = newfreq
print "mac:{} , rssi:{} , freq:{}
finally:
stream.stop_stream()
stream.close()
p.terminate()
Result
mac:default , rssi:0.0 , freq:0 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-43 , freq:430 Hz
mac:04:4f:4c:77:72:8f , rssi:-51 , freq:510 Hz
mac:20:39:56:af:51:49 , rssi:-39 , freq:390 Hz
mac:20:39:56:af:51:49 , rssi:-45 , freq:450 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-47 , freq:470 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-49 , freq:490 Hz
mac:12:84:16:9c:75:ee , rssi:-43 , freq:430 Hz
mac:da:a1:19:71:4d:0c , rssi:-55 , freq:550 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-49 , freq:490 Hz

Related

Trying to record audio with python-sounddevice but result is just static

I'm trying to record some audio by using the sounddevice library in python. This is to create a sound-activated recording for a scanner.
I cannot use the record function as it requires a time specified which is not suitable for my program as each transmission may be of different length, so I added a callback to the stream and when the audio is above a certain level will trigger the recording by creating a new file and writing the buffer data to the wave file.
I have implemented the trigger successfully, however when I attempt to write to the file and open it, it is not what the transmission was, instead it is just corrupt static.
Here's my code:
import time as timer
import wave
import numpy as np
import sounddevice as sd
RecordActivate = False
StartTime = 0
def print_sound(indata, outdata, frames, time, status):
global RecordActivate
global StartTime
volume_norm = np.linalg.norm(indata) * 10
VolumeLevel = int(volume_norm)
global f
if RecordActivate == False:
print("itls false" + str(VolumeLevel))
if VolumeLevel > 16:
RecordActivate = True
print("Begin")
StartTime = timer.time()
ThingTime = timer.strftime(
"%Y-%m-%d %H:%M:%S", timer.localtime(timer.time())
)
print("Transmission detected at " + ThingTime)
f = wave.open("scan.wav", "w")
f.setnchannels(1)
f.setsampwidth(1)
f.setframerate(44100)
if RecordActivate == True:
if VolumeLevel < 16:
print("Transmission ceased.")
RecordActivate = False
else:
f.writeframes(indata.tobytes())
print("recording..")
with sd.Stream(callback=print_sound):
while True:
thing = 0

How to make this equalizer more efficient?

So I have been working on making an equalizer and the problem I am facing is that the pyaudio stream is streaming much faster than the speed with which the eq. is finding the bass component of the audio file. I will briefly outline the implementation:
I have created two extra threads and have used tkinter for the gui. Thread 1 computes the bass component (fn bass() ) of the sound in chunks of 50ms data.
Thread 2 plots that by actually creating a rectangle in tkinter with varying top left coordinates.
flag2 keeps the main thread running, while flag synchronizes the bass() and plot() functions. The last part of the code is to ensure that the display doesn't go faster than the song itself( however the exact opposite is the concern right now).
I am attaching the code here:
import numpy as np
from scipy.io import wavfile
from numpy import fft as fft
import time
import tkinter as tk
import threading
import pyaudio
import wave
CHUNK = 1024
wf = wave.open("test3.wav", 'rb')
p = pyaudio.PyAudio()
###
def callback(in_data, frame_count, time_info, status):
data = wf.readframes(frame_count)
return (data, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
stream_callback=callback)
####
rate,audData = wavfile.read("test3.wav")
print ("Rate "+str(rate))
print ("Length of wav file(in s) = " + str(audData.shape[0]/rate))
ch1=audData[:]
tim = 0.050
pt=int(tim*rate)
flag2 = True
flag = False
cnt = 0
value=0
def bass():
global pt
global cnt
global audData
global value
global flag2
global flag
cnt +=1
fourier=fft.fft(ch1[((cnt-1)*pt):((cnt)*pt)])
fourier = abs(fourier) / float(pt)
fourier = fourier[0:25]
fourier = fourier**2
if (cnt+1)*pt > len(audData[:]) :
flag2 = False
value = (np.sum(fourier))/pt
flag= True
return
def plot():
global value
global flag
root=tk.Tk()
canvas =tk.Canvas(root,width=200,height=500)
canvas.pack()
while True:
if flag:
canvas.delete("all")
flag=False
greenbox = canvas.create_rectangle(50,500-(value/80),150,500,fill="green")
print(value/80) # to check whether it excees 500
root.update_idletasks()
root.update()
return
def sound():
global data
global stream
global wf
global CHUNK
stream.start_stream()
while stream.is_active():
time.sleep(0.1)
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
bass()
t1 = threading.Thread(target=plot, name='t_1')
t2 = threading.Thread(target=sound, name='t_2')
t1.start()
t2.start()
while flag2:
a = time.time()
bass()
b=time.time()
while (b-a) < tim :
time.sleep(0.015)
b=time.time()
To overcome this processing speed problem, I tried to process 1 in every 3 chunks:
cnt +=1
fourier=fft.fft(ch1[((3*cnt-3)*pt):((3*cnt-2)*pt)])
fourier = abs(fourier) / float(pt)
fourier = fourier[0:25]
fourier = fourier**2
if (3*cnt+1)*pt > len(audData[:]) :
flag2 = False
#######
while (b-a) < 3*tim :
time.sleep(0.015)
b=time.time()
But this even this is not up to the mark. The lag is visible after a few seconds. Any ideas on how to improve this?

Instead of efficiency, a more realistic solution might be delay matching. If you can determine the latency of your FFT and display (etc.) processes, the you can either delay sound output (using a fifo of some number of audio samples), or have the visualization process look ahead in the playback file read by the equivalent number of samples.

How to change continuously the frequency of a sinusoidal sound?

I am using Pygame to render a Sprite in a window and I want to play a sinusoidal sound which frequency depends on the y position of that Sprite. I don't want discontinuities in the phase of the signal. What's the best way to achieve this?

I came up with this solution.
Change the freq to newfreq and then change the phase like this: newphase = 2*np.pi*t*(freq-newfreq)+phase
import pyaudio
import numpy as np
from time import time
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[::2] = left
data[1::2] = left
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
start = time()
try:
while 1:
now = time()
if now-start>1/24.:
newfreq=200+np.sin(2*np.pi*1/20.*now)*100 #update the frequency This will depend on y on the future
print newfreq
start=now
finally:
stream.stop_stream()
stream.close()
p.terminate()

Live recognition with Python and Pocketsphinx

I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
"""
Decodes a speech file
"""
try:
import pocketsphinx as ps
import sphinxbase
except:
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
"""
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?

The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
in_speech_bf = False
decoder.start_utt()
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
decoder.end_utt()
print 'Result:', decoder.hyp().hypstr
decoder.start_utt()
else:
break
decoder.end_utt()
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py

Try this. Pocketsphinx is now a GStreamer plugin.

This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
"""
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
"""
Saves mic data to temporary WAV file. Returns filename of saved
file
"""
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000) # TODO make this value a function parameter?
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
else:
break
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
"""
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
"""
self.setup_mic()
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
audio2send.append(cur_data)
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
Mic.playaudiofromAudio().play("/home/l/Desktop/PROJECT/Audio/beep_hi.wav")
os.remove(filename)
return
# Removes temp audio file
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
else:
prev_audio.append(cur_data)
print "* Hoàn thành nghe"
stream.close()
p.terminate()

How to play an audiofile with pyaudio?

I do not understand the example material for pyaudio. It seems they had written an entire small program and it threw me off.
How do I just play a single audio file?
Format is not an issue, I just want to know the bare minimum code I need to play an audio file.

May be this small wrapper (warning: created on knees) of their example will help you to understand the meaning of code they wrote.
import pyaudio
import wave
import sys
class AudioFile:
chunk = 1024
def __init__(self, file):
""" Init audio stream """
self.wf = wave.open(file, 'rb')
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format = self.p.get_format_from_width(self.wf.getsampwidth()),
channels = self.wf.getnchannels(),
rate = self.wf.getframerate(),
output = True
)
def play(self):
""" Play entire file """
data = self.wf.readframes(self.chunk)
while data != b'':
self.stream.write(data)
data = self.wf.readframes(self.chunk)
def close(self):
""" Graceful shutdown """
self.stream.close()
self.p.terminate()
# Usage example for pyaudio
a = AudioFile("1.wav")
a.play()
a.close()

The example seems pretty clear to me. You simply save the example as playwav.py call:
python playwav.py my_fav_wav.wav
The wave example with some extra comments:
import pyaudio
import wave
import sys
# length of data to read.
chunk = 1024
# validation. If a wave file hasn't been specified, exit.
if len(sys.argv) < 2:
print "Plays a wave file.\n\n" +\
"Usage: %s filename.wav" % sys.argv[0]
sys.exit(-1)
'''
************************************************************************
This is the start of the "minimum needed to read a wave"
************************************************************************
'''
# open the file for reading.
wf = wave.open(sys.argv[1], 'rb')
# create an audio object
p = pyaudio.PyAudio()
# open stream based on the wave object which has been input.
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data (based on the chunk size)
data = wf.readframes(chunk)
# play stream (looping from beginning of file to the end)
while data:
# writing to the stream is what *actually* plays the sound.
stream.write(data)
data = wf.readframes(chunk)
# cleanup stuff.
wf.close()
stream.close()
p.terminate()

This way requires ffmpeg for pydub, but can play not only wave files:
import pyaudio
import sys
from pydub import AudioSegment
if len(sys.argv) <= 1:
print('No File Name!')
sys.exit(1)
chunk = 1024
fn = ' '.join(sys.argv[1:])
pd = AudioSegment.from_file(fn)
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(pd.sample_width),
channels = pd.channels,
rate = pd.frame_rate,
output = True)
i = 0
data = pd[:chunk]._data
while data:
stream.write(data)
i += chunk
data = pd[i:i + chunk]._data
stream.close()
p.terminate()
sys.exit(0)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Real time continuous sounds with Pydub - python

Related

Trying to record audio with python-sounddevice but result is just static

How to make this equalizer more efficient?

How to change continuously the frequency of a sinusoidal sound?

Live recognition with Python and Pocketsphinx

How to play an audiofile with pyaudio?

Categories

Resources