I followed a tutorial on Youtube on how to do TextToSpeech with python, and I am getting the following error
import re
import wave
import pyaudio
import _thread
import time
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict:str = 'cmudict-0.7b.txt'):
self._l = {}//Error right here ^
def _load_words(self, words_pron_dict:str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ',2)
self._l[key] = re.findall(r"[A-Z]+",val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+",str_input.upper()):
if word in self._l:
list_pron += self._l[word]
for pron in list_pron:
_thread.start_new_thread( TextToSpeech._play_audio, (pron,delay,))
delay += 0.145
def _play_audio(sound, delay):
wf = wave.open("sounds/"+sound+".wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
data = wf.readframes(TextToSpeech.CHUNK)
while data:
data = wf.readframes(TextToSpeech.CHUNK)
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))
The error is "Invalid Sytanx" right where the colon is right before "str" at the top. I'm not sure what I am doing wrong. I am using IDLE for the editor, this script requires pyaudio, which I have installed, and it also requires the document "cmudict-0.7b.text" which I also have.
I've tried copying the name of the file directly to the code, adding parenthesis changing the ' to a " where the txt file name is, to no prevail. I would appreciate it if someone could help me on this and give me some insight on what I'm doing wrong.
I'm using Python 2.7.
I am trying to stream the output of an Azure text-to-speech instance to my speaker with PyAudio using Microsoft's sample code
I tried to write to PyAudio's stream inside Azure's callback function def write, but it gives me this error:
File "/opt/homebrew/lib/python3.10/site-packages/pyaudio.py", line 589
, in write pa.write_stream(self._stream, frames, num_frames,
TypeError: argument 2 must be read-only bytes-like object, not memoryview`
How do I handle Azure's output so that the PyAudio stream accepts it as audio data?
Full code:
`import azure.cognitiveservices.speech as speechsdk
import os, sys, pyaudio
pa = pyaudio.PyAudio()
my_text = "My emotional experiences are varied, but mostly involve trying to find a balance between understanding others’ feelings and managing my own. I also explore the intersection of emotion and technology through affective computing and related research."
voc_data = {
'channels': 1 if sys.platform == 'darwin' else 2,
'rate': 44100,
'width': pa.get_sample_size(pyaudio.paInt16),
'format': pyaudio.paInt16,
'frames': []
my_stream = pa.open(format=voc_data['format'],
speech_key = os.getenv('SPEECH_KEY')
service_region = os.getenv('SPEECH_REGION')
def speech_synthesis_to_push_audio_output_stream():
"""performs speech synthesis and push audio output to a stream"""
class PushAudioOutputStreamSampleCallback(speechsdk.audio.PushAudioOutputStreamCallback):
Example class that implements the PushAudioOutputStreamCallback, which is used to show
how to push output audio to a stream
def __init__(self) -> None:
self._audio_data = bytes(0)
self._closed = False
def write(self, audio_buffer: memoryview) -> int:
The callback function which is invoked when the synthesizer has an output audio chunk
to write out
self._audio_data += audio_buffer
print("{} bytes received.".format(audio_buffer.nbytes))
return audio_buffer.nbytes
def close(self) -> None:
The callback function which is invoked when the synthesizer is about to close the
self._closed = True
print("Push audio output stream closed.")
def get_audio_data(self) -> bytes:
return self._audio_data
def get_audio_size(self) -> int:
return len(self._audio_data)
# Creates an instance of a speech config with specified subscription key and service region.
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates customized instance of PushAudioOutputStreamCallback
stream_callback = PushAudioOutputStreamSampleCallback()
# Creates audio output stream from the callback
push_stream = speechsdk.audio.PushAudioOutputStream(stream_callback)
# Creates a speech synthesizer using push stream as audio output.
stream_config = speechsdk.audio.AudioOutputConfig(stream=push_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
# Receives a text from console input and synthesizes it to stream output.
while True:
# print("Enter some text that you want to synthesize, Ctrl-Z to exit")
# try:
# text = input()
# except EOFError:
# break
result = speech_synthesizer.speak_text_async(my_text).get()
# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}], and the audio was written to output stream.".format(text))
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
# Destroys result which is necessary for destroying speech synthesizer
del result
# Destroys the synthesizer in order to close the output stream.
del speech_synthesizer
print("Totally {} bytes received.".format(stream_callback.get_audio_size()))
Here , I have a work around where instead of the using stream use a file. Where the audio will be stored in the file and then we simply read the file and play it using Py audio.
# Dependencies
import os
import azure.cognitiveservices.speech as speechsdk
import pyaudio
import wave
speech_config = speechsdk.SpeechConfig(subscription="<Key>", region="<Region>")
# Audio Config
audio_config = speechsdk.audio.AudioOutputConfig(filename="background.wav")
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
print("Enter the Text:- ")
text = input()
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
print("Conversion is Complete")
filename = 'background.wav' # Same is in audio config
chunk = 1024
file = wave.open(filename, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(file.getsampwidth()),
channels = file.getnchannels(),
rate = file.getframerate(),
output = True)
data = file.readframes(chunk)
print("Starting Audio")
while data != '':
data = file.readframes(chunk)
Here It will take more space and more time as we are reading the file.
I tried pygame for playing wav file like this:
import pygame
but It change the voice and I don't know why!
I read this link solutions and can't solve my problem with playing wave file!
for this solution I dont know what should I import?
s = Sound()
and for this solution /dev/dsp dosen't exist in new version of linux :
from wave import open as waveOpen
from ossaudiodev import open as ossOpen
s = waveOpen('tada.wav','rb')
(nc,sw,fr,nf,comptype, compname) = s.getparams( )
dsp = ossOpen('/dev/dsp','w')
from ossaudiodev import AFMT_S16_NE
except ImportError:
if byteorder == "little":
AFMT_S16_NE = ossaudiodev.AFMT_S16_LE
AFMT_S16_NE = ossaudiodev.AFMT_S16_BE
dsp.setparameters(AFMT_S16_NE, nc, fr)
data = s.readframes(nf)
and when I tried pyglet It give me this error:
import pyglet
music = pyglet.resource.media('mysound.wav')
nima#ca005 Desktop]$ python play.py
Traceback (most recent call last):
File "play.py", line 4, in <module>
music = pyglet.resource.media('mysound.wav')
File "/usr/lib/python2.7/site-packages/pyglet/resource.py", line 587, in media
return media.load(path, streaming=streaming)
File "/usr/lib/python2.7/site-packages/pyglet/media/__init__.py", line 1386, in load
source = _source_class(filename, file)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 194, in __init__
format = wave_form.get_format_chunk()
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 174, in get_format_chunk
for chunk in self.get_chunks():
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 110, in get_chunks
chunk = cls(self.file, name, length, offset)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 155, in __init__
raise RIFFFormatException('Size of format chunk is incorrect.')
pyglet.media.riff.RIFFFormatException: Size of format chunk is incorrect.
AL lib: ReleaseALC: 1 device not closed
You can use PyAudio. An example here on my Linux it works:
#!usr/bin/env python
import pyaudio
import wave
#define stream chunk
chunk = 1024
#open a wav format music
f = wave.open(r"/usr/share/sounds/alsa/Rear_Center.wav","rb")
#instantiate PyAudio
p = pyaudio.PyAudio()
#open stream
stream = p.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
#read data
data = f.readframes(chunk)
#play stream
while data:
data = f.readframes(chunk)
#stop stream
#close PyAudio
Works for me on Windows:
>>> from playsound import playsound
>>> playsound('/path/to/a/sound/file/you/want/to/play.wav')
NOTE: This has a bug in Windows where it doesn't close the stream.
I've added a PR for a fix here:
The reason pygame changes your audio is mixer defaults to a 22k sample rate:
initialize the mixer module
pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=4096): return None
Your wav is probably 8k. So when pygame plays it, it plays roughly twice as fast. So specify your wav frequency in the init.
Pyglet has some problems correctly reading RIFF headers. If you have a very basic wav file (with exactly a 16 byte fmt block) with no other information in the fmt chunk (like 'fact' data), it works. But it makes no provision for additional data in the chunks, so it's really not adhering to the RIFF interface specification.
PyGame has 2 different modules for playing sound and music, the pygame.mixer module and the pygame.mixer.music module. This module contains classes for loading Sound objects and controlling playback. The difference is explained in the documentation:
The difference between the music playback and regular Sound playback is that the music is streamed, and never actually loaded all at once. The mixer system only supports a single music stream at once.
If you want to play a single wav file, you have to initialize the module and create a pygame.mixer.Sound() object from the file. Invoke play() to start playing the file. Finally, you have to wait for the file to play.
Use get_length() to get the length of the sound in seconds and wait for the sound to finish:
(The argument to pygame.time.wait() is in milliseconds)
import pygame
my_sound = pygame.mixer.Sound('mysound.wav')
pygame.time.wait(int(my_sound.get_length() * 1000))
Alternatively you can use pygame.mixer.get_busy to test if a sound is being mixed. Query the status of the mixer continuously in a loop:
import pygame
my_sound = pygame.mixer.Sound('mysound.wav')
while pygame.mixer.get_busy():
If you are a Windows user,the easiest way is to use winsound.You don't even need to install it.
Not recommended, too few functions
import winsound
winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME)
# add winsound.SND_ASYNC flag if you want to wait for it.
# like winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME | winsound.SND_ASYNC)
If you are looking for more advanced functions, you can try mp3play.
Unluckily,mp3play is only available in Python2 and Windows.
If you want to use it on other platforms,use playsound despite its poor functions.If you want to use it in Python3,I will give you the modified version which is available on Python 3.(at the bottom of the answer)
Also,mp3play is really good at playing wave files, and it gives you more choices.
import time
import mp3play
music = mp3play.load("Wet Hands.wav")
Playsound is very easy to use,but it is not recommended because you can't pause or get some infomation of the music, and errors often occurs.Unless other ways doesn't work at all, you may try this.
import playsound
playsound.playsound("Wet Hands.wav", block=True)
I'm using this code and it works on Ubuntu 22.04 after my test.
If it doesn't work on your machine, consider updating your pygame lib.
import pygame
pygame.mixer.music.load("Wet Hands.wav")
while pygame.mixer.music.get_busy():
This works on Windows but it doesn't work on my Ubuntu, so I can do nothing.
import pyglet
import time
sound = pyglet.media.load("Wet Hands.wav", "Wet Hands.wav")
It seems that you are using Linux,so playsound may be your choice.My code maybe cannot solve your problem by using pygame and pyglet,because I always use Windows.If none of the solutions work on your machine,I suggest you run the program on Windows...
To other users seeing my answer, I have done many tests among many libraries,so if you are using Windows,you may try mp3play which can play both mp3 and wave files, and mp3play is the most pythonic, easy, light-weight and functional library.
mp3play in Python3
just copy the code below and create a file named mp3play.py in your working directory and paste the content.
import random
from ctypes import windll, c_buffer
class _mci:
def __init__(self):
self.w32mci = windll.winmm.mciSendStringA
self.w32mcierror = windll.winmm.mciGetErrorStringA
def send(self, command):
buffer = c_buffer(255)
command = command.encode(encoding="utf-8")
errorcode = self.w32mci(command, buffer, 254, 0)
if errorcode:
return errorcode, self.get_error(errorcode)
return errorcode, buffer.value
def get_error(self, error):
error = int(error)
buffer = c_buffer(255)
self.w32mcierror(error, buffer, 254)
return buffer.value
def directsend(self, txt):
(err, buf) = self.send(txt)
# if err != 0:
# print('Error %s for "%s": %s' % (str(err), txt, buf))
return err, buf
class _AudioClip(object):
def __init__(self, filename):
filename = filename.replace('/', '\\')
self.filename = filename
self._alias = 'mp3_%s' % str(random.random())
self._mci = _mci()
self._mci.directsend(r'open "%s" alias %s' % (filename, self._alias))
self._mci.directsend('set %s time format milliseconds' % self._alias)
err, buf = self._mci.directsend('status %s length' % self._alias)
self._length_ms = int(buf)
def volume(self, level):
"""Sets the volume between 0 and 100."""
self._mci.directsend('setaudio %s volume to %d' %
(self._alias, level * 10))
def play(self, start_ms=None, end_ms=None):
start_ms = 0 if not start_ms else start_ms
end_ms = self.milliseconds() if not end_ms else end_ms
err, buf = self._mci.directsend('play %s from %d to %d'
% (self._alias, start_ms, end_ms))
def isplaying(self):
return self._mode() == 'playing'
def _mode(self):
err, buf = self._mci.directsend('status %s mode' % self._alias)
return buf
def pause(self):
self._mci.directsend('pause %s' % self._alias)
def unpause(self):
self._mci.directsend('resume %s' % self._alias)
def ispaused(self):
return self._mode() == 'paused'
def stop(self):
self._mci.directsend('stop %s' % self._alias)
self._mci.directsend('seek %s to start' % self._alias)
def milliseconds(self):
return self._length_ms
def __del__(self):
self._mci.directsend('close %s' % self._alias)
_PlatformSpecificAudioClip = _AudioClip
class AudioClip(object):
__slots__ = ['_clip']
def __init__(self, filename):
self._clip = _PlatformSpecificAudioClip(filename)
def play(self, start_ms=None, end_ms=None):
if end_ms is not None and end_ms < start_ms:
return self._clip.play(start_ms, end_ms)
def volume(self, level):
assert 0 <= level <= 100
return self._clip.volume(level)
def isplaying(self):
return self._clip.isplaying()
def pause(self):
return self._clip.pause()
def unpause(self):
return self._clip.unpause()
def ispaused(self):
return self._clip.ispaused()
def stop(self):
return self._clip.stop()
def seconds(self):
return int(round(float(self.milliseconds()) / 1000))
def milliseconds(self):
return self._clip.milliseconds()
def load(filename):
"""Return an AudioClip for the given filename."""
return AudioClip(filename)
I have recently been working with pocket sphinx in python. I have successfully got the
example below to work recognising a recorded wav.
#!/usr/bin/env python
import sys,os
def decodeSpeech(hmmd,lmdir,dictp,wavfile):
Decodes a speech file
import pocketsphinx as ps
import sphinxbase
print """Pocket sphinx and sphixbase is not installed
in your system. Please install it with package manager.
speechRec = ps.Decoder(hmm = hmmd, lm = lmdir, dict = dictp)
wavFile = file(wavfile,'rb')
result = speechRec.get_hyp()
return result[0]
if __name__ == "__main__":
hmdir = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/hmm/wsj1"
lmd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.3e-7.vp.tg.lm.DMP"
dictd = "/home/jaganadhg/Desktop/Docs_New/kgisl/model/lm/wsj/wlist5o.dic"
wavfile = "/home/jaganadhg/Desktop/Docs_New/kgisl/sa1.wav"
recognised = decodeSpeech(hmdir,lmd,dictd,wavfile)
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
print recognised
print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
The problem is how can I do real time speech recognition from a microphone? In
a while loop with a if statement so that if a set word is recognised from the microphone
a function can be called?
The code for realtime recognition looks like this:
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
config.set_string('-logfn', '/dev/null')
decoder = Decoder(config)
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
in_speech_bf = False
while True:
buf = stream.read(1024)
if buf:
decoder.process_raw(buf, False, False)
if decoder.get_in_speech() != in_speech_bf:
in_speech_bf = decoder.get_in_speech()
if not in_speech_bf:
print 'Result:', decoder.hyp().hypstr
You can also use gstreamer python bindings in pocketsphinx, check livedemo.py
Try this. Pocketsphinx is now a GStreamer plugin.
This is the code I see on the internet and I've modified a few things to really listen to the words very bad and slow
You can help me modify it for good. It is built on ubuntu 16.04 LTS
I do not know much about programming
Looking forward to help
# -*- encoding: utf-8 -*-
#!/usr/bin/env python
from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *
import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math;import Mic
Written by Sophie Li, 2016
class SpeechDetector:
def __init__(self):
# Microphone stream config.
self.CHUNK = 1024 # CHUNKS of bytes to read each time from mic
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.SILENCE_LIMIT = 1 # Silence limit in seconds. The max ammount of seconds where
# only silence is recorded. When this time passes the
# recording finishes and the file is decoded
self.PREV_AUDIO = 0.5 # Previous audio (in seconds) to prepend. When noise
# is detected, how much of previously recorded audio is
# prepended. This helps to prevent chopping the beginning
# of the phrase.
self.THRESHOLD = 4500
self.num_phrases = -1
# These will need to be modified according to where the pocketsphinx folder is
MODELDIR = "/home/l/Desktop/pocketsphinx/model/en-us"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us/'))
config.set_string('-lm', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/en-us.lm.bin'))
config.set_string('-dict', os.path.join(MODELDIR, '/home/l/Desktop/pocketsphinx/model/en-us/cmudict-en-us.dict'))
config.set_string('-keyphrase', 'no one')
config.set_float('-kws_threshold', 1e+20)
# Creaders decoder object for streaming data.
self.decoder = Decoder(config)
def setup_mic(self, num_samples=50):
""" Gets average audio intensity of your mic sound. You can use it to get
average intensities while you're talking and/or silent. The average
is the avg of the .2 of the largest intensities recorded.
#print "Getting intensity values from mic."
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
for x in range(num_samples)]
values = sorted(values, reverse=True)
r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
#print " Finished "
#print " Average audio intensity is ", r
if r < 3000:
self.THRESHOLD = 3500
self.THRESHOLD = r + 100
def save_speech(self, data, p):
Saves mic data to temporary WAV file. Returns filename of saved
filename = 'output_'+str(int(time.time()))
# writes data to WAV file
data = ''.join(data)
wf = wave.open(filename + '.wav', 'wb')
wf.setframerate(16000) # TODO make this value a function parameter?
return filename + '.wav'
def decode_phrase(self, wav_file):
stream = open(wav_file, "rb")
while True:
buf = stream.read(1024)
if buf:
self.decoder.process_raw(buf, False, False)
words = []
[words.append(seg.word) for seg in self.decoder.seg()]
return words
def run(self):
Listens to Microphone, extracts phrases from it and calls pocketsphinx
to decode the sound
#Open stream
p = pyaudio.PyAudio()
stream = p.open(format=self.FORMAT,
audio2send = []
cur_data = '' # current chunk of audio data
rel = self.RATE/self.CHUNK
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
#Prepend audio from 0.5 seconds before noise was detected
prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
started = False
while True:
cur_data = stream.read(self.CHUNK)
slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))
if sum([x > self.THRESHOLD for x in slid_win]) > 0:
if started == False:
print "Bắt đầu ghi âm"
started = True
elif started:
print "Hoàn thành ghi âm"
filename = self.save_speech(list(prev_audio) + audio2send, p)
r = self.decode_phrase(filename)
print "RESULT: ", r
# hot word for me " no one" if r.count('one') and r.count("no") > 0 the end programs
if r.count("one") > 0 and r.count("no") > 0:
# Removes temp audio file
# Reset all
started = False
slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
prev_audio = deque(maxlen= 0.5 * rel)
audio2send = []
print "Chế độ nghe ..."
print "* Hoàn thành nghe"
I looked at this question: pyaudio help play a file
While this question did get answered I never got a clear answer of where to actually put the song file.
This is the code for playing a WAVE file:
""" Play a WAVE file. """
import pyaudio
import wave
import sys
chunk = 1024
if len(sys.argv) < 2:
print "Plays a wave file.\n\n" +\
"Usage: %s filename.wav" % sys.argv[0]
wf = wave.open(sys.argv[1], 'rb')
p = pyaudio.PyAudio()
# open stream
stream = p.open(format =
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data
data = wf.readframes(chunk)
# play stream
while data != '':
data = wf.readframes(chunk)
I've looked through the code but I can't find anything in the code where I actually insert the music file itself. When I press the "Play" button in my program (I use wxform with this program) nothing is played.
The magic line is:
wf = wave.open(sys.argv[1], 'rb')
This seems to say that the first argument to the script (sys.argv[1]) is used as the input for waves.
I don't know anything of pyaudio but it seems pretty clear that the song file is the first argument that is passed to the program when you execute it. Look att this line: wf = wave.open(sys.argv[1], 'rb') Just change to sys.arg[1] to 'c:/filename.wav' or something.
And the program won't run as it is written now if you don't pass any argument to it. Because of the if len(sys.argv) < 2 block
Just know a few things of python, pyaudio but it seems that the song file is the first argument that is passed to the program when you execute it.
Just change insert an argument like this :
python your-python_file.py sound_file.wav
Here is a solution :
Comment the If Statment and directly add the file name to play
import pyaudio
import wave
import sys
CHUNK = 1024
#if len(sys.argv) < 2:
# print("Plays a wave file.\n\nUsage: %s output.wav" % sys.argv[0])
# sys.exit(-1)
wf = wave.open("output.wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
data = wf.readframes(CHUNK)
while data != '':
data = wf.readframes(CHUNK)
I do not understand the example material for pyaudio. It seems they had written an entire small program and it threw me off.
How do I just play a single audio file?
Format is not an issue, I just want to know the bare minimum code I need to play an audio file.
May be this small wrapper (warning: created on knees) of their example will help you to understand the meaning of code they wrote.
import pyaudio
import wave
import sys
class AudioFile:
chunk = 1024
def __init__(self, file):
""" Init audio stream """
self.wf = wave.open(file, 'rb')
self.p = pyaudio.PyAudio()
self.stream = self.p.open(
format = self.p.get_format_from_width(self.wf.getsampwidth()),
channels = self.wf.getnchannels(),
rate = self.wf.getframerate(),
output = True
def play(self):
""" Play entire file """
data = self.wf.readframes(self.chunk)
while data != b'':
data = self.wf.readframes(self.chunk)
def close(self):
""" Graceful shutdown """
# Usage example for pyaudio
a = AudioFile("1.wav")
The example seems pretty clear to me. You simply save the example as playwav.py call:
python playwav.py my_fav_wav.wav
The wave example with some extra comments:
import pyaudio
import wave
import sys
# length of data to read.
chunk = 1024
# validation. If a wave file hasn't been specified, exit.
if len(sys.argv) < 2:
print "Plays a wave file.\n\n" +\
"Usage: %s filename.wav" % sys.argv[0]
This is the start of the "minimum needed to read a wave"
# open the file for reading.
wf = wave.open(sys.argv[1], 'rb')
# create an audio object
p = pyaudio.PyAudio()
# open stream based on the wave object which has been input.
stream = p.open(format =
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)
# read data (based on the chunk size)
data = wf.readframes(chunk)
# play stream (looping from beginning of file to the end)
while data:
# writing to the stream is what *actually* plays the sound.
data = wf.readframes(chunk)
# cleanup stuff.
This way requires ffmpeg for pydub, but can play not only wave files:
import pyaudio
import sys
from pydub import AudioSegment
if len(sys.argv) <= 1:
print('No File Name!')
chunk = 1024
fn = ' '.join(sys.argv[1:])
pd = AudioSegment.from_file(fn)
p = pyaudio.PyAudio()
stream = p.open(format =
channels = pd.channels,
rate = pd.frame_rate,
output = True)
i = 0
data = pd[:chunk]._data
while data:
i += chunk
data = pd[i:i + chunk]._data