Azure Text-To-Spech to PyAudio Stream

Azure Text-To-Spech to PyAudio Stream - python

I am trying to stream the output of an Azure text-to-speech instance to my speaker with PyAudio using Microsoft's sample code
I tried to write to PyAudio's stream inside Azure's callback function def write, but it gives me this error:
`my_stream.write(audio_buffer)
File "/opt/homebrew/lib/python3.10/site-packages/pyaudio.py", line 589
, in write pa.write_stream(self._stream, frames, num_frames,
TypeError: argument 2 must be read-only bytes-like object, not memoryview`
How do I handle Azure's output so that the PyAudio stream accepts it as audio data?
Full code:
`import azure.cognitiveservices.speech as speechsdk
import os, sys, pyaudio
pa = pyaudio.PyAudio()
my_text = "My emotional experiences are varied, but mostly involve trying to find a balance between understanding others’ feelings and managing my own. I also explore the intersection of emotion and technology through affective computing and related research."
voc_data = {
'channels': 1 if sys.platform == 'darwin' else 2,
'rate': 44100,
'width': pa.get_sample_size(pyaudio.paInt16),
'format': pyaudio.paInt16,
'frames': []
}
my_stream = pa.open(format=voc_data['format'],
channels=voc_data['channels'],
rate=voc_data['rate'],
output=True)
speech_key = os.getenv('SPEECH_KEY')
service_region = os.getenv('SPEECH_REGION')
def speech_synthesis_to_push_audio_output_stream():
"""performs speech synthesis and push audio output to a stream"""
class PushAudioOutputStreamSampleCallback(speechsdk.audio.PushAudioOutputStreamCallback):
"""
Example class that implements the PushAudioOutputStreamCallback, which is used to show
how to push output audio to a stream
"""
def __init__(self) -> None:
super().__init__()
self._audio_data = bytes(0)
self._closed = False
def write(self, audio_buffer: memoryview) -> int:
"""
The callback function which is invoked when the synthesizer has an output audio chunk
to write out
"""
self._audio_data += audio_buffer
my_stream.write(audio_buffer)
print("{} bytes received.".format(audio_buffer.nbytes))
return audio_buffer.nbytes
def close(self) -> None:
"""
The callback function which is invoked when the synthesizer is about to close the
stream.
"""
self._closed = True
print("Push audio output stream closed.")
def get_audio_data(self) -> bytes:
return self._audio_data
def get_audio_size(self) -> int:
return len(self._audio_data)
# Creates an instance of a speech config with specified subscription key and service region.
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates customized instance of PushAudioOutputStreamCallback
stream_callback = PushAudioOutputStreamSampleCallback()
# Creates audio output stream from the callback
push_stream = speechsdk.audio.PushAudioOutputStream(stream_callback)
# Creates a speech synthesizer using push stream as audio output.
stream_config = speechsdk.audio.AudioOutputConfig(stream=push_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
# Receives a text from console input and synthesizes it to stream output.
while True:
# print("Enter some text that you want to synthesize, Ctrl-Z to exit")
# try:
# text = input()
# except EOFError:
# break
result = speech_synthesizer.speak_text_async(my_text).get()
# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}], and the audio was written to output stream.".format(text))
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
# Destroys result which is necessary for destroying speech synthesizer
del result
# Destroys the synthesizer in order to close the output stream.
del speech_synthesizer
print("Totally {} bytes received.".format(stream_callback.get_audio_size()))
speech_synthesis_to_push_audio_output_stream()`

Here , I have a work around where instead of the using stream use a file. Where the audio will be stored in the file and then we simply read the file and play it using Py audio.
# Dependencies
import os
import azure.cognitiveservices.speech as speechsdk
import pyaudio
import wave
speech_config = speechsdk.SpeechConfig(subscription="<Key>", region="<Region>")
# Audio Config
audio_config = speechsdk.audio.AudioOutputConfig(filename="background.wav")
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
print("Enter the Text:- ")
text = input()
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
print("Conversion is Complete")
filename = 'background.wav' # Same is in audio config
chunk = 1024
file = wave.open(filename, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(file.getsampwidth()),
channels = file.getnchannels(),
rate = file.getframerate(),
output = True)
data = file.readframes(chunk)
print("Starting Audio")
while data != '':
stream.write(data)
data = file.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()
Here It will take more space and more time as we are reading the file.

Related

Capture Sound from Mirophone and speakers

We are using amazon transcribe to make speach to text but we need to capture sound from both microphone and speakers. Do you think that this can be done with sounddevice or should we use something else?
amazon-transcribe-streaming-sdk
Mic Function
async def mic_stream():
# This function wraps the raw input stream from the microphone forwarding
# the blocks to an asyncio.Queue.
loop = asyncio.get_event_loop()
input_queue = asyncio.Queue()
def callback(indata, outdata, frame_count, time_info, status):
'''
if status:
print(status)
'''
#indata[:] = outdata
loop.call_soon_threadsafe(input_queue.put_nowait, (bytes(indata), status))
# Be sure to use the correct parameters for the audio stream that matches
# the audio formats described for the source language you'll be using:
# https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html
stream = sounddevice.RawStream(
#device=3,
channels=1,
samplerate=16000,
callback=callback,
blocksize=1024 * 2,
dtype="int16",
)
# Initiate the audio stream and asynchronously yield the audio chunks
# as they become available.
with stream:
while True:
indata, status = await input_queue.get()
yield indata, status

How can I make my audio loop with pyaudio?

First of all I'm pretty new to this library and I don't really understand everything. With the help of the internet I managed to get this code snippet working. This code basically plays an audio file(.wav to be specific). The problem is that it only plays once; I want the audio file to loop until I set the is_looping variable to False.
import pyaudio
import wave
class AudioFile:
chunk = 1024
def __init__(self, file_dir):
""" Init audio stream """
self.wf = wave.open(file_dir, 'rb')
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format=self.p.get_format_from_width(self.wf.getsampwidth()),
channels=self.wf.getnchannels(),
rate=self.wf.getframerate(),
output=True
)
def play(self):
""" Play entire file """
data = self.wf.readframes(self.chunk)
while data != '':
self.stream.write(data)
data = self.wf.readframes(self.chunk)
def close(self):
""" Graceful shutdown """
self.stream.close()
self.p.terminate()
is_looping = True
audio = AudioFile("___.wav")
audio.play()
audio.close()
I tried doing something like this, but it still didn't work:
is_looping = True
audio = AudioFile("___.wav")
while is_looping:
audio.play()
audio.close()

I couldn't find a way to loop the audio using my code, but I found a code in the internet that does exactly what I wanted it to do. Here's the link: https://gist.github.com/THeK3nger/3624478
And here is the code from that link:
import os
import wave
import threading
import sys
# PyAudio Library
import pyaudio
class WavePlayerLoop(threading.Thread):
CHUNK = 1024
def __init__(self, filepath, loop=True):
"""
Initialize `WavePlayerLoop` class.
PARAM:
-- filepath (String) : File Path to wave file.
-- loop (boolean) : True if you want loop playback.
False otherwise.
"""
super(WavePlayerLoop, self).__init__()
self.filepath = os.path.abspath(filepath)
self.loop = loop
def run(self):
# Open Wave File and start play!
wf = wave.open(self.filepath, 'rb')
player = pyaudio.PyAudio()
# Open Output Stream (based on PyAudio tutorial)
stream = player.open(format=player.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# PLAYBACK LOOP
data = wf.readframes(self.CHUNK)
while self.loop:
stream.write(data)
data = wf.readframes(self.CHUNK)
if data == b'': # If file is over then rewind.
wf.rewind()
data = wf.readframes(self.CHUNK)
stream.close()
player.terminate()
def play(self):
"""
Just another name for self.start()
"""
self.start()
def stop(self):
"""
Stop playback.
"""
self.loop = False
You just need to add something like this outside the class and it should work:
player = WavePlayerLoop("sounds/1.wav")
player.play()

How to pass live audio url to Google Speech to Text API

I have a url to live audio recording that I'm trying to transcribe using Google Speech to Text API. I am using an example code from the Cloud Speech to Text API. However, the problem is that when I pass the live url I do not receive any output. Below is the relevant portion of my code. Any help would be greatly appreciated!
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import io
import os
import time
import requests
import numpy as np
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from urllib.request import urlopen
from datetime import datetime
from datetime import timedelta
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]= "app_creds.json"
def get_stream():
stream = urlopen('streamurl')
duration = 60
begin = datetime.now()
duration = timedelta(seconds=duration)
while datetime.now() - begin < duration:
data = stream.read(8000)
return data
def transcribe_streaming():
"""Streams transcription of the given audio file."""
client = speech.SpeechClient()
content = get_stream()
# In practice, stream should be a generator yielding chunks of audio data.
stream = [content]
requests = (types.StreamingRecognizeRequest(audio_content=chunk)
for chunk in stream)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
streaming_config = types.StreamingRecognitionConfig(config=config)
# streaming_recognize returns a generator.
responses = client.streaming_recognize(streaming_config, requests)
for response in responses:
# Once the transcription has settled, the first result will contain the
# is_final result. The other results will be for subsequent portions of
# the audio.
for result in response.results:
print('Finished: {}'.format(result.is_final))
print('Stability: {}'.format(result.stability))
alternatives = result.alternatives
# The alternatives are ordered from most likely to least.
for alternative in alternatives:
print('Confidence: {}'.format(alternative.confidence))
print(u'Transcript: {}'.format(alternative.transcript))

When sending audio to the Google Speech service, make sure that the service object setup matches the audio encoding. In your particular case
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
corresponds to single channel, 16KHz, linear 16 bit PCM encoding. See the list of other supported encodings if you need to transcribe audio in different formats.

A part of my code I used a while back, I don't know if that may help:
def live_recognize_loop(self):
client = self.client
def is_running():
return self.recording
while self.recording:
with MicrophoneStream(RATE, CHUNK) as stream:
audio_generator = stream.generator(is_running)
requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator)
responses = client.streaming_recognize(client.custom_streaming_config, requests)
responses_iterator = iter(responses)
while self.recording:
try:
response = next(responses_iterator)
except StopIteration:
break
except OutOfRange:
# Exception 400 - Exceeded maximum allowed stream duration of 65 seconds.
self.user_display(self.intermediateFrame.GetMessageText())
break # Start over
except ServiceUnavailable as e:
# Exception 503 - Getting metadata from plugin failed
self.log("{0} - NOT RECOGNIZED - {1}\n".format(self.getDate(), e))
break
except ResourceExhausted as e:
break
except GoogleAPICallError as e:
break
if response.results:
result = response.results[0]
if result.alternatives:
transcript = result.alternatives[0].transcript
self.intermediateFrame.SetMessageText(transcript)
if not result.is_final:
self.intermediateFrame.Display()
# print(transcript)
else:
self.user_display(transcript)
self.intermediateFrame.Display(False)
self.intermediateFrame.SetMessageText("")
#print("\t\t FINAL: %s" % transcript)
break # Start over
MicrophoneStream class
from __future__ import division
import pyaudio
from six.moves import queue
class MicrophoneStream(object):
"""Opens a recording stream as a generator yielding the audio chunks."""
def __init__(self, rate, chunk):
self._rate = rate
self._chunk = chunk
# Create a thread-safe buffer of audio data
self._buff = queue.Queue()
self.closed = True
def __enter__(self):
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
# The API currently only supports 1-channel (mono) audio
channels=1, rate=self._rate,
input=True, frames_per_buffer=self._chunk,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
stream_callback=self._fill_buffer,
)
self.closed = False
return self
def __exit__(self, type, value, traceback):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
"""Continuously collect data from the audio stream, into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue
def generator(self, is_running=None):
while not self.closed:
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = self._buff.get()
if callable(is_running) and not is_running():
return
if chunk is None:
return
data = [chunk]
# Now consume whatever other data's still buffered.
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break
yield b''.join(data)

Try using:
import urllib
urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
(for Python 3+ use import urllib.request and urllib.request.urlretrieve)

Live recognition with Python and Pocketsphinx

I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
"""
Decodes a speech file
"""
try:
import pocketsphinx as ps
import sphinxbase
except:
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
"""
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
wavFile.seek(44)
speechRec.decode_raw(wavFile)
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?

The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
stream.start_stream()
in_speech_bf = False
decoder.start_utt()
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
decoder.end_utt()
print 'Result:', decoder.hyp().hypstr
decoder.start_utt()
else:
break
decoder.end_utt()
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py

Try this. Pocketsphinx is now a GStreamer plugin.

This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
"""
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
stream.close()
p.terminate()
if r < 3000:
self.THRESHOLD = 3500
else:
self.THRESHOLD = r + 100
def save_speech(self, data, p):
"""
Saves mic data to temporary WAV file. Returns filename of saved
file
"""
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000) # TODO make this value a function parameter?
wf.writeframes(data)
wf.close()
return filename + '.wav'
def decode_phrase(self, wav_file):
self.decoder.start_utt()
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
else:
break
self.decoder.end_utt()
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
"""
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
"""
self.setup_mic()
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
audio2send.append(cur_data)
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
Mic.playaudiofromAudio().play("/home/l/Desktop/PROJECT/Audio/beep_hi.wav")
os.remove(filename)
return
# Removes temp audio file
os.remove(filename)
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
else:
prev_audio.append(cur_data)
print "* Hoàn thành nghe"
stream.close()
p.terminate()

How to play an audiofile with pyaudio?

I do not understand the example material for pyaudio. It seems they had written an entire small program and it threw me off.
How do I just play a single audio file?
Format is not an issue, I just want to know the bare minimum code I need to play an audio file.

May be this small wrapper (warning: created on knees) of their example will help you to understand the meaning of code they wrote.
import pyaudio
import wave
import sys
class AudioFile:
chunk = 1024
def __init__(self, file):
""" Init audio stream """
self.wf = wave.open(file, 'rb')
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format = self.p.get_format_from_width(self.wf.getsampwidth()),
channels = self.wf.getnchannels(),
rate = self.wf.getframerate(),
output = True
)
def play(self):
""" Play entire file """
data = self.wf.readframes(self.chunk)
while data != b'':
self.stream.write(data)
data = self.wf.readframes(self.chunk)
def close(self):
""" Graceful shutdown """
self.stream.close()
self.p.terminate()
# Usage example for pyaudio
a = AudioFile("1.wav")
a.play()
a.close()

The example seems pretty clear to me. You simply save the example as playwav.py call:
python playwav.py my_fav_wav.wav
The wave example with some extra comments:
import pyaudio
import wave
import sys
# length of data to read.
chunk = 1024
# validation. If a wave file hasn't been specified, exit.
if len(sys.argv) < 2:
print "Plays a wave file.\n\n" +\
"Usage: %s filename.wav" % sys.argv[0]
sys.exit(-1)
'''
************************************************************************
This is the start of the "minimum needed to read a wave"
************************************************************************
'''
# open the file for reading.
wf = wave.open(sys.argv[1], 'rb')
# create an audio object
p = pyaudio.PyAudio()
# open stream based on the wave object which has been input.
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data (based on the chunk size)
data = wf.readframes(chunk)
# play stream (looping from beginning of file to the end)
while data:
# writing to the stream is what *actually* plays the sound.
stream.write(data)
data = wf.readframes(chunk)
# cleanup stuff.
wf.close()
stream.close()
p.terminate()

This way requires ffmpeg for pydub, but can play not only wave files:
import pyaudio
import sys
from pydub import AudioSegment
if len(sys.argv) <= 1:
print('No File Name!')
sys.exit(1)
chunk = 1024
fn = ' '.join(sys.argv[1:])
pd = AudioSegment.from_file(fn)
p = pyaudio.PyAudio()
stream = p.open(format =
p.get_format_from_width(pd.sample_width),
channels = pd.channels,
rate = pd.frame_rate,
output = True)
i = 0
data = pd[:chunk]._data
while data:
stream.write(data)
i += chunk
data = pd[i:i + chunk]._data
stream.close()
p.terminate()
sys.exit(0)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Azure Text-To-Spech to PyAudio Stream - python

Related

Capture Sound from Mirophone and speakers

How can I make my audio loop with pyaudio?

How to pass live audio url to Google Speech to Text API

Live recognition with Python and Pocketsphinx

How to play an audiofile with pyaudio?

Categories

Resources