Deepspeech realtime speech to text

Deepspeech realtime speech to text - python

How can I do real-time speech to text using deep speech and a microphone?
I tried running this script I found on GitHub, but when I run it and I do not say anything for a while, it starts printing random text.
import pyaudio
import deepspeech
import numpy as np
from queue import SimpleQueue
BUFFERS_PER_SECOND = 10
SAMPLE_WIDTH = 2
BEAM_WIDTH = 512
#switch between tensorflow and tensorflow light model
#MODEL_PATH = 'deepspeech-0.8.1-models.tflite'
MODEL_PATH = 'models\DeepSpeech\deepspeech-0.9.3-models.pbmm'
SCORER_PATH = 'models\DeepSpeech\deepspeech-0.9.3-models.scorer'
buffer_queue = SimpleQueue()
def audio_callback(in_data, frame_count, time_info, status_flags):
buffer_queue.put(np.frombuffer(in_data, dtype='int16'))
return (None, pyaudio.paContinue)
def find_device(pyaudio, device_name):
''' find specific device or return default input device'''
default = pyaudio.get_default_input_device_info()
for i in range(pyaudio.get_device_count()):
name = pyaudio.get_device_info_by_index(i)['name']
if name == device_name:
return (i, name)
return (default['index'], default['name'])
def main():
model = deepspeech.Model(MODEL_PATH)
model.setBeamWidth(BEAM_WIDTH)
model.enableExternalScorer(SCORER_PATH)
stream = model.createStream()
audio = pyaudio.PyAudio()
index, name = find_device(audio, 'pulse')
print(f'select device {name}')
buffer_size = model.sampleRate() // BUFFERS_PER_SECOND
audio_stream = audio.open(rate=model.sampleRate(),
channels=1,
format=audio.get_format_from_width(
SAMPLE_WIDTH, unsigned=False),
input_device_index=index,
input=True,
frames_per_buffer=buffer_size,
stream_callback=audio_callback)
num_iterations = BUFFERS_PER_SECOND * 2
i = 0
while audio_stream.is_active():
stream.feedAudioContent(buffer_queue.get())
if i % num_iterations == 0:
text = stream.intermediateDecode()
if text.find('stop') >= 0:
break
print(text)
i += 1
print(stream.finishStream())
audio_stream.close()
if __name__ == '__main__':
main()
#find_device()
I know there are other options, but every option I found was either free trial or instantly paying. So if someone could help me work with DeepSpeech, or if somebody knows a free alternative, I would really appreciate it

Related

Azure Text-To-Spech to PyAudio Stream

I am trying to stream the output of an Azure text-to-speech instance to my speaker with PyAudio using Microsoft's sample code
I tried to write to PyAudio's stream inside Azure's callback function def write, but it gives me this error:
`my_stream.write(audio_buffer)
File "/opt/homebrew/lib/python3.10/site-packages/pyaudio.py", line 589
, in write pa.write_stream(self._stream, frames, num_frames,
TypeError: argument 2 must be read-only bytes-like object, not memoryview`
How do I handle Azure's output so that the PyAudio stream accepts it as audio data?
Full code:
`import azure.cognitiveservices.speech as speechsdk
import os, sys, pyaudio
pa = pyaudio.PyAudio()
my_text = "My emotional experiences are varied, but mostly involve trying to find a balance between understanding others’ feelings and managing my own. I also explore the intersection of emotion and technology through affective computing and related research."
voc_data = {
'channels': 1 if sys.platform == 'darwin' else 2,
'rate': 44100,
'width': pa.get_sample_size(pyaudio.paInt16),
'format': pyaudio.paInt16,
'frames': []
}
my_stream = pa.open(format=voc_data['format'],
channels=voc_data['channels'],
rate=voc_data['rate'],
output=True)
speech_key = os.getenv('SPEECH_KEY')
service_region = os.getenv('SPEECH_REGION')
def speech_synthesis_to_push_audio_output_stream():
"""performs speech synthesis and push audio output to a stream"""
class PushAudioOutputStreamSampleCallback(speechsdk.audio.PushAudioOutputStreamCallback):
"""
Example class that implements the PushAudioOutputStreamCallback, which is used to show
how to push output audio to a stream
"""
def __init__(self) -> None:
super().__init__()
self._audio_data = bytes(0)
self._closed = False
def write(self, audio_buffer: memoryview) -> int:
"""
The callback function which is invoked when the synthesizer has an output audio chunk
to write out
"""
self._audio_data += audio_buffer
my_stream.write(audio_buffer)
print("{} bytes received.".format(audio_buffer.nbytes))
return audio_buffer.nbytes
def close(self) -> None:
"""
The callback function which is invoked when the synthesizer is about to close the
stream.
"""
self._closed = True
print("Push audio output stream closed.")
def get_audio_data(self) -> bytes:
return self._audio_data
def get_audio_size(self) -> int:
return len(self._audio_data)
# Creates an instance of a speech config with specified subscription key and service region.
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates customized instance of PushAudioOutputStreamCallback
stream_callback = PushAudioOutputStreamSampleCallback()
# Creates audio output stream from the callback
push_stream = speechsdk.audio.PushAudioOutputStream(stream_callback)
# Creates a speech synthesizer using push stream as audio output.
stream_config = speechsdk.audio.AudioOutputConfig(stream=push_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
# Receives a text from console input and synthesizes it to stream output.
while True:
# print("Enter some text that you want to synthesize, Ctrl-Z to exit")
# try:
# text = input()
# except EOFError:
# break
result = speech_synthesizer.speak_text_async(my_text).get()
# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}], and the audio was written to output stream.".format(text))
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
# Destroys result which is necessary for destroying speech synthesizer
del result
# Destroys the synthesizer in order to close the output stream.
del speech_synthesizer
print("Totally {} bytes received.".format(stream_callback.get_audio_size()))
speech_synthesis_to_push_audio_output_stream()`

Here , I have a work around where instead of the using stream use a file. Where the audio will be stored in the file and then we simply read the file and play it using Py audio.
# Dependencies
import os
import azure.cognitiveservices.speech as speechsdk
import pyaudio
import wave
speech_config = speechsdk.SpeechConfig(subscription="<Key>", region="<Region>")
# Audio Config
audio_config = speechsdk.audio.AudioOutputConfig(filename="background.wav")
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
print("Enter the Text:- ")
text = input()
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
print("Conversion is Complete")
filename = 'background.wav' # Same is in audio config
chunk = 1024
file = wave.open(filename, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(file.getsampwidth()),
channels = file.getnchannels(),
rate = file.getframerate(),
output = True)
data = file.readframes(chunk)
print("Starting Audio")
while data != '':
stream.write(data)
data = file.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()
Here It will take more space and more time as we are reading the file.

How to multithread a program and return results to main.py?

I am using Windows and python. The objective is simple, running the main.py starts a speech recognition and once it recognizes what was said returns the text to main.py. The speech recognition program recognizes without any issues, the problem is at multithreading and getting the result back to main.py.
Here is the main.py:
import threading
from speechEngine.recognize import *
from SpeechSynthesis.speech import *
from core.AI import *
spch= "default"
newthread=threading.Thread(target=speechrec())
newthread.start()
while(True):
if(spch == "default"):
print("at default")
continue
else:
print(spch)
result=process(spch)
speak(result)
spch="default"
And here is speech recognition which is called as a new thread:
import argparse
import os
import queue
from typing import Text
import sounddevice as sd
import vosk
import sys
import json
#from vosk import SetLogLevel
#SetLogLevel(-1)
def speechrec():
q = queue.Queue()
"a lot of argument lines have been deleted to increase readability"
try:
if args.model is None:
args.model = "model"
if args.samplerate is None:
device_info = sd.query_devices(args.device, 'input')
args.samplerate = int(device_info['default_samplerate'])
model = vosk.Model(args.model)
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device, dtype='int16', channels=1, callback=callback):
rec = vosk.KaldiRecognizer(model, args.samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
vc=rec.FinalResult() #produces raw output of what the user said
vc=json.loads(vc)
vc=vc['text'] #converts the user speech to text format
if vc != '':
global spch
spch = vc
except KeyboardInterrupt:
parser.exit(0)

Python, text to speech, invalid syntax, using IDLE editor

I followed a tutorial on Youtube on how to do TextToSpeech with python, and I am getting the following error
import re
import wave
import pyaudio
import _thread
import time
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict:str = 'cmudict-0.7b.txt'):
self._l = {}//Error right here ^
self._load_words(words_pron_dict)
def _load_words(self, words_pron_dict:str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ',2)
self._l[key] = re.findall(r"[A-Z]+",val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+",str_input.upper()):
if word in self._l:
list_pron += self._l[word]
print(list_pron)
delay=0
for pron in list_pron:
_thread.start_new_thread( TextToSpeech._play_audio, (pron,delay,))
delay += 0.145
def _play_audio(sound, delay):
try:
time.sleep(delay)
wf = wave.open("sounds/"+sound+".wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(TextToSpeech.CHUNK)
while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
return
except:
pass
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))
The error is "Invalid Sytanx" right where the colon is right before "str" at the top. I'm not sure what I am doing wrong. I am using IDLE for the editor, this script requires pyaudio, which I have installed, and it also requires the document "cmudict-0.7b.text" which I also have.
I've tried copying the name of the file directly to the code, adding parenthesis changing the ' to a " where the txt file name is, to no prevail. I would appreciate it if someone could help me on this and give me some insight on what I'm doing wrong.
I'm using Python 2.7.
Thanks.

Real time continuous sounds with Pydub

I'm trying to make a program that generates sounds from a wifi probe log, so that the number of devices (within a certain distance) generate a tone, and that the rssi is to be the frequence.
I'm trying to make it as real time as possible, but can't figure out how to make the tones continuous and change the frequence based on value change.
'''
This program takes a log file from a
wifi probe and translates it into sound
'''
import time
import math #import needed modules
import pyaudio #sudo apt-get install python-pyaudio
import threading
from threading import Thread
from pydub import AudioSegment
from pydub.generators import Sine
from pydub.playback import play
import signal
def logData():
'''
Takes log file data and puts it into database
updates every 1 sec
'''
global dic
global tone
tone = []
dic = {}
while True:
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as f:
for line in f:
(key, val) = line.split()
if val <= str(-50):
dic[(key)] = val
print (dic)
time.sleep(1)
def sound():
'''
Generate sounds
'''
# Play final tone
while (True):
with open("/Users/CWT/Documents/VÆRKER/probemon.log") as i:
try:
tone1 = Sine(abs(int(list(dic.values())[0]))).to_audio_segment(3000)
tone2 = Sine(abs(int(list(dic.values())[1]))).to_audio_segment(3000)
tone3 = Sine(abs(int(list(dic.values())[2]))).to_audio_segment(3000)
except:
print('Index error')
try:
multitone1 = tone1
multitone2 = tone1.overlay(tone2)
multitone3 = tone3.overlay(multitone2)
except:
print('Multitone error')
try:
if len(dic) <= 1:
play(multitone1.fade_in(250).fade_out(250))
elif len(dic) == 2:
play(multitone2.fade_in(250).fade_out(250))
elif len(dic) >= 3:
play(multitone3.fade_in(250).fade_out(250))
except:
print('Playback error')
if __name__ == '__main__':
try:
Thread(target = logData).start()
time.sleep(1)
Thread(target = sound).start()
except KeyboardInterrupt:
print('Interrupted')

I was able to craft a boiler-plate solution that you can tailor per your needs.
Here's central idea
1) Read last line of log file using os.popen in a continuous loop that repeats every second
2) the RSSI value is very small and the difference between these values is also small. We multiply that by constant 100 here to create noticeable difference. You can try different values.
3) Using pydub we create sine tones and play them
Code
from pydub.generators import Sine
from pydub import AudioSegment
from pydub.playback import play
import os
import time
sr = 44100 # sample rate
bd = 16 # bit depth
l = 50.0 # duration in millisec
last_line = "" #to avoid same line played again
log_file = "probemon.log"
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
if last_line == line:
pass
else:
key, val = line.split()
f = abs(int(val)) * 100
#create sine wave of given freq
sine_wave = Sine(f, sample_rate=sr, bit_depth=bd)
#Convert waveform to audio_segment for playback and export
sine_segment = sine_wave.to_audio_segment(duration=l)
print "mac:{} , rssi:{}".format(key,val)
#Play audio segment
play(sine_segment)
last_line = line
time.sleep(1) #sleep 1 sec, synch this with log file fill
I tested by filling up probemon.log file line by line from a different terminal with a delay of 1 second. The loop will wait if there is no new data.
EDIT1
Audio "tone" have "frequency" , when you change Frequency, tone changes.
Based on our discussion, since we need tone to vary in real-time, we can't use pydub which is mostly good for offline manipulation.
pyaudio has a non-blocking method using callback, which allows manipulating stream data while its being played in realtime.
This solution plays audio based on last line of log continuously until log data changes.
This solution also eliminated popping / cracking sound that occurs when merging two tones.
Inspiration from here.
import pyaudio
import numpy as np
from time import time,sleep
import os
CHANNELS = 2
RATE = 44100
TT = time()
freq = 100
newfreq = 100
phase = 0
log_file = "probemon.log"
def callback(in_data, frame_count, time_info, status):
global TT,phase,freq,newfreq
if newfreq != freq:
phase = 2*np.pi*TT*(freq-newfreq)+phase
freq=newfreq
left = (np.sin(phase+2*np.pi*freq*(TT+np.arange(frame_count)/float(RATE))))
data = np.zeros((left.shape[0]*2,),np.float32)
data[0::2] = left #left data
data[1::2] = left #right data
TT+=frame_count/float(RATE)
return (data, pyaudio.paContinue)
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
output=True,
stream_callback=callback)
stream.start_stream()
tmphold = ""
try:
while True:
line = os.popen('tail -n 1 {}'.format(log_file)).read()
try:
key, val = line.split()
except:
key, val = "default", 0.0
f = abs(int(val))
newfreq = f * 10 #update freq per log
if newfreq != tmphold:
tmphold = newfreq
print "mac:{} , rssi:{} , freq:{}
finally:
stream.stop_stream()
stream.close()
p.terminate()
Result
mac:default , rssi:0.0 , freq:0 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-43 , freq:430 Hz
mac:04:4f:4c:77:72:8f , rssi:-51 , freq:510 Hz
mac:20:39:56:af:51:49 , rssi:-39 , freq:390 Hz
mac:20:39:56:af:51:49 , rssi:-45 , freq:450 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-47 , freq:470 Hz
mac:5e:e2:1d:a3:d2:da , rssi:-49 , freq:490 Hz
mac:12:84:16:9c:75:ee , rssi:-43 , freq:430 Hz
mac:da:a1:19:71:4d:0c , rssi:-55 , freq:550 Hz
mac:d8:8f:76:1a:cb:65 , rssi:-49 , freq:490 Hz

Live recognition with Python and Pocketsphinx

I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
"""
Decodes a speech file
"""
try:
import pocketsphinx as ps
import sphinxbase
except:
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
"""
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?

The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
in_speech_bf = False
decoder.start_utt()
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
decoder.end_utt()
print 'Result:', decoder.hyp().hypstr
decoder.start_utt()
else:
break
decoder.end_utt()
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py

Try this. Pocketsphinx is now a GStreamer plugin.

This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
"""
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
"""
Saves mic data to temporary WAV file. Returns filename of saved
file
"""
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000) # TODO make this value a function parameter?
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
else:
break
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
"""
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
"""
self.setup_mic()
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
audio2send.append(cur_data)
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
Mic.playaudiofromAudio().play("/home/l/Desktop/PROJECT/Audio/beep_hi.wav")
os.remove(filename)
return
# Removes temp audio file
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
else:
prev_audio.append(cur_data)
print "* Hoàn thành nghe"
stream.close()
p.terminate()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Deepspeech realtime speech to text - python

Related

Azure Text-To-Spech to PyAudio Stream

How to multithread a program and return results to main.py?

Python, text to speech, invalid syntax, using IDLE editor

Real time continuous sounds with Pydub

Live recognition with Python and Pocketsphinx

Categories

Resources