IOError Input overflowed: Record audio with Tkinter interface - python

I'm trying to do a simple GUI application that has only one button: Record.
You press the button and the recording begins. When you release the button the recording is stopped and the recording is saved.
However, I get the following error when I click the button:
Traceback (most recent call last):
...
data = self.stream.read(self.CHUNK)
File (...), line 608, in read
return pa.read_stream(self._stream, num_frames, exception_on_overflow)
IOError: [Errno -9981] Input overflowed
Exception in Tkinter callback
However I do not have problems with recording a simple audio without the button and Tkinter (the code example they give here).
This is the code:
import Tkinter as tk
import pyaudio, wave
class AppRecording:
def __init__(self, root):
self.root = root
self.mouse_pressed = False
recordingButton = tk.Button(root, text = "Record")
recordingButton.pack()
recordingButton.bind("<ButtonPress-1>", self.OnMouseDown)
recordingButton.bind("<ButtonRelease-1>", self.OnMouseUp)
self.CHUNK = 1024
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 2
self.RATE = 44100
self.WAVE_OUTPUT_FILENAME = "output.wav"
self.p = pyaudio.PyAudio()
try: self.stream = self.p.open(format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK)
except:
raise Exception("There is no connected microphone. Check that you connect to the left hole if you have a PC.")
return None
self.frames = []
def recordFrame(self):
try:
data = self.stream.read(self.CHUNK)
print "after try"
except IOError as ex:
print "inside except"
if ex[1] != pyaudio.paInputOverflowed:
print "before raise"
raise
print "after raise"
data = '\x00' * self.CHUNK # or however you choose to handle it, e.g. return None
self.frames.append(data)
def finishRecording(self):
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
wf = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(self.CHANNELS)
wf.setsampwidth(self.p.get_sample_size(self.FORMAT))
wf.setframerate(self.RATE)
wf.writeframes(b''.join(self.frames))
wf.close()
def OnMouseDown(self, event):
self.mouse_pressed = True
self.poll()
def OnMouseUp(self, event):
self.root.after_cancel(self.after_id)
print "Finished recording!"
self.finishRecording()
def poll(self):
if self.mouse_pressed:
self.recordFrame()
self.after_id = self.root.after(1, self.poll)
root=tk.Tk()
app = AppRecording(root)
root.mainloop()
I tried to change the self.CHUNK and self.RATE. The internal microphone of my iMac says that the rate is 44100. In some places I read that I should change the chunk or rate value, tried both but no one helped. Another place told me to add the except IOError as ex: (...)
PyAudio version: 0.2.10
pyaudio.get_portaudio_version(): 1246720
pyaudio.get_portaudio_version_text(): PortAudio V19.6.0-devel, revision 396fe4b6699ae929d3a685b3ef8a7e97396139a4
Tkinter.__version__: $Revision: 81008 $
I would appreciate your help, thanks!

Which python/tk/portaudio/pyaudio version ?
I confirm that your code is good (no issue) under Ubuntu 14.04 LTS x64 (Python 2.7 with portaudio19-dev and PyAudio-0.2.10) so I assume that issue maybe relative to your python, tk, pyaudio or portaudio version...
Are you sure that you have the last portaudio & tk version installed on your computer ?

Related

Azure Text-To-Spech to PyAudio Stream

I am trying to stream the output of an Azure text-to-speech instance to my speaker with PyAudio using Microsoft's sample code
I tried to write to PyAudio's stream inside Azure's callback function def write, but it gives me this error:
`my_stream.write(audio_buffer)
File "/opt/homebrew/lib/python3.10/site-packages/pyaudio.py", line 589
, in write pa.write_stream(self._stream, frames, num_frames,
TypeError: argument 2 must be read-only bytes-like object, not memoryview`
How do I handle Azure's output so that the PyAudio stream accepts it as audio data?
Full code:
`import azure.cognitiveservices.speech as speechsdk
import os, sys, pyaudio
pa = pyaudio.PyAudio()
my_text = "My emotional experiences are varied, but mostly involve trying to find a balance between understanding others’ feelings and managing my own. I also explore the intersection of emotion and technology through affective computing and related research."
voc_data = {
'channels': 1 if sys.platform == 'darwin' else 2,
'rate': 44100,
'width': pa.get_sample_size(pyaudio.paInt16),
'format': pyaudio.paInt16,
'frames': []
}
my_stream = pa.open(format=voc_data['format'],
channels=voc_data['channels'],
rate=voc_data['rate'],
output=True)
speech_key = os.getenv('SPEECH_KEY')
service_region = os.getenv('SPEECH_REGION')
def speech_synthesis_to_push_audio_output_stream():
"""performs speech synthesis and push audio output to a stream"""
class PushAudioOutputStreamSampleCallback(speechsdk.audio.PushAudioOutputStreamCallback):
"""
Example class that implements the PushAudioOutputStreamCallback, which is used to show
how to push output audio to a stream
"""
def __init__(self) -> None:
super().__init__()
self._audio_data = bytes(0)
self._closed = False
def write(self, audio_buffer: memoryview) -> int:
"""
The callback function which is invoked when the synthesizer has an output audio chunk
to write out
"""
self._audio_data += audio_buffer
my_stream.write(audio_buffer)
print("{} bytes received.".format(audio_buffer.nbytes))
return audio_buffer.nbytes
def close(self) -> None:
"""
The callback function which is invoked when the synthesizer is about to close the
stream.
"""
self._closed = True
print("Push audio output stream closed.")
def get_audio_data(self) -> bytes:
return self._audio_data
def get_audio_size(self) -> int:
return len(self._audio_data)
# Creates an instance of a speech config with specified subscription key and service region.
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Creates customized instance of PushAudioOutputStreamCallback
stream_callback = PushAudioOutputStreamSampleCallback()
# Creates audio output stream from the callback
push_stream = speechsdk.audio.PushAudioOutputStream(stream_callback)
# Creates a speech synthesizer using push stream as audio output.
stream_config = speechsdk.audio.AudioOutputConfig(stream=push_stream)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
# Receives a text from console input and synthesizes it to stream output.
while True:
# print("Enter some text that you want to synthesize, Ctrl-Z to exit")
# try:
# text = input()
# except EOFError:
# break
result = speech_synthesizer.speak_text_async(my_text).get()
# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized for text [{}], and the audio was written to output stream.".format(text))
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("Speech synthesis canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
# Destroys result which is necessary for destroying speech synthesizer
del result
# Destroys the synthesizer in order to close the output stream.
del speech_synthesizer
print("Totally {} bytes received.".format(stream_callback.get_audio_size()))
speech_synthesis_to_push_audio_output_stream()`
Here , I have a work around where instead of the using stream use a file. Where the audio will be stored in the file and then we simply read the file and play it using Py audio.
# Dependencies
import os
import azure.cognitiveservices.speech as speechsdk
import pyaudio
import wave
speech_config = speechsdk.SpeechConfig(subscription="<Key>", region="<Region>")
# Audio Config
audio_config = speechsdk.audio.AudioOutputConfig(filename="background.wav")
speech_config.speech_synthesis_voice_name='en-US-JennyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
print("Enter the Text:- ")
text = input()
speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
print("Conversion is Complete")
filename = 'background.wav' # Same is in audio config
chunk = 1024
file = wave.open(filename, 'rb')
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(file.getsampwidth()),
channels = file.getnchannels(),
rate = file.getframerate(),
output = True)
data = file.readframes(chunk)
print("Starting Audio")
while data != '':
stream.write(data)
data = file.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()
Here It will take more space and more time as we are reading the file.

Set up pygame mixer [duplicate]

I tried pygame for playing wav file like this:
import pygame
pygame.init()
pygame.mixer.music.load("mysound.wav")
pygame.mixer.music.play()
pygame.event.wait()
but It change the voice and I don't know why!
I read this link solutions and can't solve my problem with playing wave file!
for this solution I dont know what should I import?
s = Sound()
s.read('sound.wav')
s.play()
and for this solution /dev/dsp dosen't exist in new version of linux :
from wave import open as waveOpen
from ossaudiodev import open as ossOpen
s = waveOpen('tada.wav','rb')
(nc,sw,fr,nf,comptype, compname) = s.getparams( )
dsp = ossOpen('/dev/dsp','w')
try:
from ossaudiodev import AFMT_S16_NE
except ImportError:
if byteorder == "little":
AFMT_S16_NE = ossaudiodev.AFMT_S16_LE
else:
AFMT_S16_NE = ossaudiodev.AFMT_S16_BE
dsp.setparameters(AFMT_S16_NE, nc, fr)
data = s.readframes(nf)
s.close()
dsp.write(data)
dsp.close()
and when I tried pyglet It give me this error:
import pyglet
music = pyglet.resource.media('mysound.wav')
music.play()
pyglet.app.run()
--------------------------
nima#ca005 Desktop]$ python play.py
Traceback (most recent call last):
File "play.py", line 4, in <module>
music = pyglet.resource.media('mysound.wav')
File "/usr/lib/python2.7/site-packages/pyglet/resource.py", line 587, in media
return media.load(path, streaming=streaming)
File "/usr/lib/python2.7/site-packages/pyglet/media/__init__.py", line 1386, in load
source = _source_class(filename, file)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 194, in __init__
format = wave_form.get_format_chunk()
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 174, in get_format_chunk
for chunk in self.get_chunks():
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 110, in get_chunks
chunk = cls(self.file, name, length, offset)
File "/usr/lib/python2.7/site-packages/pyglet/media/riff.py", line 155, in __init__
raise RIFFFormatException('Size of format chunk is incorrect.')
pyglet.media.riff.RIFFFormatException: Size of format chunk is incorrect.
AL lib: ReleaseALC: 1 device not closed
You can use PyAudio. An example here on my Linux it works:
#!usr/bin/env python
#coding=utf-8
import pyaudio
import wave
#define stream chunk
chunk = 1024
#open a wav format music
f = wave.open(r"/usr/share/sounds/alsa/Rear_Center.wav","rb")
#instantiate PyAudio
p = pyaudio.PyAudio()
#open stream
stream = p.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
#read data
data = f.readframes(chunk)
#play stream
while data:
stream.write(data)
data = f.readframes(chunk)
#stop stream
stream.stop_stream()
stream.close()
#close PyAudio
p.terminate()
Works for me on Windows:
https://pypi.org/project/playsound/
>>> from playsound import playsound
>>> playsound('/path/to/a/sound/file/you/want/to/play.wav')
NOTE: This has a bug in Windows where it doesn't close the stream.
I've added a PR for a fix here:
https://github.com/TaylorSMarks/playsound/pull/53/commits/53240d970aef483b38fc6d364a0ae0ad6f8bf9a0
The reason pygame changes your audio is mixer defaults to a 22k sample rate:
initialize the mixer module
pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=4096): return None
Your wav is probably 8k. So when pygame plays it, it plays roughly twice as fast. So specify your wav frequency in the init.
Pyglet has some problems correctly reading RIFF headers. If you have a very basic wav file (with exactly a 16 byte fmt block) with no other information in the fmt chunk (like 'fact' data), it works. But it makes no provision for additional data in the chunks, so it's really not adhering to the RIFF interface specification.
PyGame has 2 different modules for playing sound and music, the pygame.mixer module and the pygame.mixer.music module. This module contains classes for loading Sound objects and controlling playback. The difference is explained in the documentation:
The difference between the music playback and regular Sound playback is that the music is streamed, and never actually loaded all at once. The mixer system only supports a single music stream at once.
If you want to play a single wav file, you have to initialize the module and create a pygame.mixer.Sound() object from the file. Invoke play() to start playing the file. Finally, you have to wait for the file to play.
Use get_length() to get the length of the sound in seconds and wait for the sound to finish:
(The argument to pygame.time.wait() is in milliseconds)
import pygame
pygame.mixer.init()
my_sound = pygame.mixer.Sound('mysound.wav')
my_sound.play()
pygame.time.wait(int(my_sound.get_length() * 1000))
Alternatively you can use pygame.mixer.get_busy to test if a sound is being mixed. Query the status of the mixer continuously in a loop:
import pygame
pygame.init()
pygame.mixer.init()
my_sound = pygame.mixer.Sound('mysound.wav')
my_sound.play()
while pygame.mixer.get_busy():
pygame.time.delay(10)
pygame.event.poll()
Windows
winsound
If you are a Windows user,the easiest way is to use winsound.You don't even need to install it.
Not recommended, too few functions
import winsound
winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME)
# add winsound.SND_ASYNC flag if you want to wait for it.
# like winsound.PlaySound("Wet Hands.wav", winsound.SND_FILENAME | winsound.SND_ASYNC)
mp3play
If you are looking for more advanced functions, you can try mp3play.
Unluckily,mp3play is only available in Python2 and Windows.
If you want to use it on other platforms,use playsound despite its poor functions.If you want to use it in Python3,I will give you the modified version which is available on Python 3.(at the bottom of the answer)
Also,mp3play is really good at playing wave files, and it gives you more choices.
import time
import mp3play
music = mp3play.load("Wet Hands.wav")
music.play()
time.sleep(music.seconds())
Cross-platform
playsound
Playsound is very easy to use,but it is not recommended because you can't pause or get some infomation of the music, and errors often occurs.Unless other ways doesn't work at all, you may try this.
import playsound
playsound.playsound("Wet Hands.wav", block=True)
pygame
I'm using this code and it works on Ubuntu 22.04 after my test.
If it doesn't work on your machine, consider updating your pygame lib.
import pygame
pygame.mixer.init()
pygame.mixer.music.load("Wet Hands.wav")
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
pass
pyglet
This works on Windows but it doesn't work on my Ubuntu, so I can do nothing.
import pyglet
import time
sound = pyglet.media.load("Wet Hands.wav", "Wet Hands.wav")
sound.play()
time.sleep(sound.duration)
Conclusion
It seems that you are using Linux,so playsound may be your choice.My code maybe cannot solve your problem by using pygame and pyglet,because I always use Windows.If none of the solutions work on your machine,I suggest you run the program on Windows...
To other users seeing my answer, I have done many tests among many libraries,so if you are using Windows,you may try mp3play which can play both mp3 and wave files, and mp3play is the most pythonic, easy, light-weight and functional library.
mp3play in Python3
just copy the code below and create a file named mp3play.py in your working directory and paste the content.
import random
from ctypes import windll, c_buffer
class _mci:
def __init__(self):
self.w32mci = windll.winmm.mciSendStringA
self.w32mcierror = windll.winmm.mciGetErrorStringA
def send(self, command):
buffer = c_buffer(255)
command = command.encode(encoding="utf-8")
errorcode = self.w32mci(command, buffer, 254, 0)
if errorcode:
return errorcode, self.get_error(errorcode)
else:
return errorcode, buffer.value
def get_error(self, error):
error = int(error)
buffer = c_buffer(255)
self.w32mcierror(error, buffer, 254)
return buffer.value
def directsend(self, txt):
(err, buf) = self.send(txt)
# if err != 0:
# print('Error %s for "%s": %s' % (str(err), txt, buf))
return err, buf
class _AudioClip(object):
def __init__(self, filename):
filename = filename.replace('/', '\\')
self.filename = filename
self._alias = 'mp3_%s' % str(random.random())
self._mci = _mci()
self._mci.directsend(r'open "%s" alias %s' % (filename, self._alias))
self._mci.directsend('set %s time format milliseconds' % self._alias)
err, buf = self._mci.directsend('status %s length' % self._alias)
self._length_ms = int(buf)
def volume(self, level):
"""Sets the volume between 0 and 100."""
self._mci.directsend('setaudio %s volume to %d' %
(self._alias, level * 10))
def play(self, start_ms=None, end_ms=None):
start_ms = 0 if not start_ms else start_ms
end_ms = self.milliseconds() if not end_ms else end_ms
err, buf = self._mci.directsend('play %s from %d to %d'
% (self._alias, start_ms, end_ms))
def isplaying(self):
return self._mode() == 'playing'
def _mode(self):
err, buf = self._mci.directsend('status %s mode' % self._alias)
return buf
def pause(self):
self._mci.directsend('pause %s' % self._alias)
def unpause(self):
self._mci.directsend('resume %s' % self._alias)
def ispaused(self):
return self._mode() == 'paused'
def stop(self):
self._mci.directsend('stop %s' % self._alias)
self._mci.directsend('seek %s to start' % self._alias)
def milliseconds(self):
return self._length_ms
def __del__(self):
self._mci.directsend('close %s' % self._alias)
_PlatformSpecificAudioClip = _AudioClip
class AudioClip(object):
__slots__ = ['_clip']
def __init__(self, filename):
self._clip = _PlatformSpecificAudioClip(filename)
def play(self, start_ms=None, end_ms=None):
if end_ms is not None and end_ms < start_ms:
return
else:
return self._clip.play(start_ms, end_ms)
def volume(self, level):
assert 0 <= level <= 100
return self._clip.volume(level)
def isplaying(self):
return self._clip.isplaying()
def pause(self):
return self._clip.pause()
def unpause(self):
return self._clip.unpause()
def ispaused(self):
return self._clip.ispaused()
def stop(self):
return self._clip.stop()
def seconds(self):
return int(round(float(self.milliseconds()) / 1000))
def milliseconds(self):
return self._clip.milliseconds()
def load(filename):
"""Return an AudioClip for the given filename."""
return AudioClip(filename)

Save an h264 video input from a drone camera as a video file on the Computer

I have a tello ryze drone, which has got a camera onboard. I am connecting to the drone in order to receive the video over wifi. My system is windows 10 and I am using python 2.7.
I am receiving a h264 bytecode and i use the libh264 decoder from tello in order to get the frames of the video, which i also display in my UI.
What i need to do is to save this video as a file on my computer and i have problems with this. I am able to make a snapshot with opencv and save it as an image, this isn't a problem. But making a video isn't working for some reason.
I read many posts here like this one but they dont work out for me. I am getting either an error or i get a very small video file, which doesnt open.
My Frames are List of List with RGB values like:
[[255,200,100][55,200,100][25,20,100]]
Here is my code for a better understanding in order to help me
This is the UI Part(i just copy here the needed code only):
def videoLoop(self):
try:
# start the thread that get GUI image and draw skeleton
time.sleep(0.5)
self.sending_command_thread.start()
while not self.stopEvent.is_set():
system = platform.system()
# read the frame for GUI show
self.frame = self.drone.read()
if self.frame is None or self.frame.size == 0:
continue
# transfer the format from frame to image
image = Image.fromarray(self.frame)
# we found compatibility problem between Tkinter,PIL and Macos,and it will
# sometimes result the very long preriod of the "ImageTk.PhotoImage" function,
# so for Macos,we start a new thread to execute the _updateGUIImage function.
if system == "Windows" or system == "Linux":
self.refreshUI(image)
else:
thread_tmp = threading.Thread(target=self.refreshUI, args=(image,))
thread_tmp.start()
time.sleep(0.03)
except RuntimeError as e:
print("[INFO] caught a RuntimeError")
def refreshUI(self, image):
image = ImageTk.PhotoImage(image)
# if the imagePanel none ,we need to initial it
if self.imagePanel is None:
self.imagePanel = tki.Label(image=image)
self.imagePanel.image = image
self.imagePanel.pack(side="left", fill="both",
expand="yes", padx=10, pady=10)
# otherwise, simply update the imagePanel
else:
self.imagePanel.configure(image=image)
self.imagePanel.image = image
def takeSnapshot(self):
# grab the current timestamp and use it to construct the filename
ts = datetime.datetime.now()
filename = "{}.jpg".format(ts.strftime("%d-%m-%Y_%H-%M-%S"))
p = os.path.sep.join((self.screenShotPath, filename))
# save the file
cv2.imwrite(p, cv2.cvtColor(self.frame, cv2.COLOR_RGB2BGR))
print("[INFO] saved {}".format(filename))
Below you can find the Tello code (only the needed part too):
def read(self):
"""Return the last frame from camera."""
if self.is_freeze:
return self.last_frame
else:
return self.frame
def video_freeze(self, is_freeze=True):
"""Pause video output -- set is_freeze to True"""
self.is_freeze = is_freeze
if is_freeze:
self.last_frame = self.frame
def _receive_video_thread(self):
"""
Listens for video streaming (raw h264) from the Tello.
Runs as a thread, sets self.frame to the most recent frame Tello captured.
"""
packet_data = b''
while True:
try:
res_string, ip = self.socket_video.recvfrom(2048)
packet_data += res_string
# end of frame
if len(res_string) != 1460:
for frame in self._h264_decod(packet_data):
self.frame = frame
packet_data = b''
except socket.error as exc:
print(("Caught exception sock.error : %s" % exc))
def _h264_decod(self, packet_data):
"""
decode raw h264 format data from Tello
:param packet_data: raw h264 data array
:return: a list of decoded frame
"""
res_frame_list = []
frames = self.decoder.decode(packet_data)
for framedata in frames:
(frame, w, h, ls) = framedata
if frame is not None:
# print ('frame size %i bytes, w %i, h %i, linesize %i' % (len(frame), w, h, ls))
frame = np.frombuffer(frame, dtype=np.ubyte, count=len(frame))
frame = (frame.reshape((h, ls // 3, 3)))
frame = frame[:, :w, :]
res_frame_list.append(frame)
return res_frame_list
It would be very kind of you if someone could help me write a method like this pseudocode:
def saveVideo(self, frame_or_whatever_i_need_here):
out = cv2.VideoWriter('output.avi_or_other_format', -1, 20.0, (640,480))
out.write(frame_or_whatever_i_need_here)
out.release()
Edit 1:
i found an option making a video out of my snapshots, this means that i coud make snapshot over a thread and later save them to a video. That would be an option. The problem is, that it would consume too much space in the disk. The workaround link is here
I found the solution, so i will post it here if someone needs the same thing. I used the following blog and modified the code to do my work, you can find the post here
self.frame = None
self.frame_array = []
def videoLoop(self):
try:
# start the thread that get GUI image and draw skeleton
time.sleep(0.5)
self.sending_command_thread.start()
while not self.stopEvent.is_set():
system = platform.system()
# read the frame for GUI show
self.frame = self.drone.read()
self.frame_array.append(self.frame)
if self.frame is None or self.frame.size == 0:
continue
# transfer the format from frame to image
image = Image.fromarray(self.frame)
if system == "Windows" or system == "Linux":
self.refreshUI(image)
else:
thread_tmp = threading.Thread(target=self.refreshUI, args=(image,))
thread_tmp.start()
time.sleep(0.03)
except RuntimeError as e:
print("[INFO] caught a RuntimeError")
def convert_frames_to_video(self, pathOut, fps):
size = (self.videoWidth, self.videoHeight)
out = cv2.VideoWriter(pathOut, cv2.VideoWriter_fourcc(*'DIVX'), fps, size)
for i in range(len(self.frame_array)):
# writing to a image array
out.write(self.frame_array[i])
out.release()
def onClose(self):
print("[INFO] closing...")
self.convert_frames_to_video('video.avi', 25.0)
self.stopEvent.set()
del self.drone
self.root.quit()

Python, text to speech, invalid syntax, using IDLE editor

I followed a tutorial on Youtube on how to do TextToSpeech with python, and I am getting the following error
import re
import wave
import pyaudio
import _thread
import time
class TextToSpeech:
CHUNK = 1024
def __init__(self, words_pron_dict:str = 'cmudict-0.7b.txt'):
self._l = {}//Error right here ^
self._load_words(words_pron_dict)
def _load_words(self, words_pron_dict:str):
with open(words_pron_dict, 'r') as file:
for line in file:
if not line.startswith(';;;'):
key, val = line.split(' ',2)
self._l[key] = re.findall(r"[A-Z]+",val)
def get_pronunciation(self, str_input):
list_pron = []
for word in re.findall(r"[\w']+",str_input.upper()):
if word in self._l:
list_pron += self._l[word]
print(list_pron)
delay=0
for pron in list_pron:
_thread.start_new_thread( TextToSpeech._play_audio, (pron,delay,))
delay += 0.145
def _play_audio(sound, delay):
try:
time.sleep(delay)
wf = wave.open("sounds/"+sound+".wav", 'rb')
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(TextToSpeech.CHUNK)
while data:
stream.write(data)
data = wf.readframes(TextToSpeech.CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
return
except:
pass
if __name__ == '__main__':
tts = TextToSpeech()
while True:
tts.get_pronunciation(input('Enter a word or phrase: '))
The error is "Invalid Sytanx" right where the colon is right before "str" at the top. I'm not sure what I am doing wrong. I am using IDLE for the editor, this script requires pyaudio, which I have installed, and it also requires the document "cmudict-0.7b.text" which I also have.
I've tried copying the name of the file directly to the code, adding parenthesis changing the ' to a " where the txt file name is, to no prevail. I would appreciate it if someone could help me on this and give me some insight on what I'm doing wrong.
I'm using Python 2.7.
Thanks.

I can't run a simple code using pyaudio - [Errno -9996] Invalid output device (no default output device)

(I'm new at python)
I'm trying to run a simple code about pyaudio. I just copied and pasted a code that I found on the pyaudio web site.
I get this error:
OSError Traceback (most recent call last)
<ipython-input-7-3fc52ceecbf3> in <module>()
15 channels=wf.getnchannels(),
16 rate=wf.getframerate(),
---> 17 output=True)
18
19 # read data
/home/gustavolg/anaconda3/lib/python3.5/site-packages/pyaudio.py in open(self, *args, **kwargs)
748 """
749
--> 750 stream = Stream(self, *args, **kwargs)
751 self._streams.add(stream)
752 return stream
/home/gustavolg/anaconda3/lib/python3.5/site-packages/pyaudio.py in __init__(self, PA_manager, rate, channels, format, input, output, input_device_index, output_device_index, frames_per_buffer, start, input_host_api_specific_stream_info, output_host_api_specific_stream_info, stream_callback)
439
440 # calling pa.open returns a stream object
--> 441 self._stream = pa.open(**arguments)
442
443 self._input_latency = self._stream.inputLatency
OSError: [Errno -9996] Invalid output device (no default output device)
I can not figure out how to solve this error. I don't know if this has something to do with audio driver or if the code needs an output declaration. I mean, if I have to select an output.
The code:
import pyaudio
import wave
import sys
CHUNK = 1024
wf = wave.open("/home/gustavolg/anaconda3/aPython/file.wav", 'rb')
# instantiate PyAudio (1)
p = pyaudio.PyAudio()
# open stream (2)
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# read data
data = wf.readframes(CHUNK)
# play stream (3)
while len(data) > 0:
stream.write(data)
data = wf.readframes(CHUNK)
# stop stream (4)
stream.stop_stream()
stream.close()
# close PyAudio (5)
p.terminate()
I'm using python3 on Jupyter notebook.
check the following steps:
>>> import pyaudio
>>> pa = pyaudio.PyAudio()
>>> pa.get_default_input_device_info()
{'defaultLowOutputLatency': 0.008707482993197279,
'maxOutputChannels': 32,
'hostApi': 0,
'defaultSampleRate': 44100.0,
'defaultHighOutputLatency': 0.034829931972789115,
'name': 'default',
'index': 15,
'maxInputChannels': 32,
'defaultHighInputLatency': 0.034829931972789115,
'defaultLowInputLatency': 0.008707482993197279,
'structVersion': 2}
>>> pyaudio.pa.__file__
'/root/.virtualenvs/py3k/lib/python3.4/site-packages/_portaudio.cpython-34m.so'
make sure you have a default input device,if not you can refer to here
I want it's useful for you!
This may not be the issue that OP ran into, but I found a cause and fix of the [Errno -996] exception (and I thought I'd share this here since Google results lead here). Attempting to play multiple sounds with pyaudio at the same time can cause such a crash. For example, with something like this:
def playSound( self, soundName ):
audioFilePath = self._soundBank.get( soundName )
if not audioFilePath:
audioFilePath = os.path.join( globalData.paths['audioFolder'], soundName + ".wav" )
if not os.path.exists( audioFilePath ):
print( 'Invalid or missing sound file for', soundName )
return
self._soundBank[soundName] = audioFilePath
# Play the audio clip in a separate thread so that it's non-blocking
audioThread = Thread( target=self._playSoundHelper, args=(audioFilePath,) )
audioThread.start()
def _playSoundHelper( self, soundFilePath ):
""" Helper (thread-target) function for playSound(). Runs in a separate
thread to prevent audio playback from blocking anything else. """
p = None
wf = None
stream = None
try:
# Instantiate PyAudio and open the target audio file
p = pyaudio.PyAudio()
wf = wave.open( soundFilePath, 'rb' )
# Open an audio data stream
stream = p.open( format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True )
# Continuously read/write data from the file to the stream until there is no data left
data = wf.readframes( 1024 )
while len( data ) > 0:
stream.write( data )
data = wf.readframes( 1024 )
except AttributeError:
pass # Program probably closed while playing audio
except Exception as err:
soundFileName = os.path.basename( soundFilePath )
print( 'Unable to play "{}" sound.'.format(soundFileName) )
print( err )
# Stop the stream
if stream:
stream.stop_stream()
stream.close()
# Close PyAudio
if p:
p.terminate()
# Close the wav file
if wf:
wf.close()
Above, if playSound() is called too soon after another playSound(), the program will crash. However, this can be resolved by using an Event() object to mediate initialization of pyaudio and the stream so that only one may be initialized at a time, while still allowing the sounds to play back for-the-most-part simultaneously or 'on top' of each other (i.e the work in the data read/write loop portion of the thread). I added this like so:
def __init( self ):
self.audioGate = Event()
self.audioGate.set()
def _playSoundHelper( self, soundFilePath ):
""" Helper (thread-target) function for playSound(). Runs in a separate
thread to prevent audio playback from blocking anything else. """
p = None
wf = None
stream = None
try:
# Prevent race conditions on multiple sounds playing at once (can cause a crash); only allow one file to begin playing at a time
self.audioGate.wait() # Blocks until the following is done (event is re-set)
self.audioGate.clear()
# Instantiate PyAudio and open the target audio file
p = pyaudio.PyAudio()
wf = wave.open( soundFilePath, 'rb' )
# Open an audio data stream
stream = p.open( format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True )
self.audioGate.set() # Allow a new sound to be opened/initialized
# Continuously read/write data from the file to the stream until there is no data left
data = wf.readframes( 1024 )
while len( data ) > 0:
stream.write( data )
data = wf.readframes( 1024 )
except AttributeError:
pass # Program probably closed while playing audio
except Exception as err:
soundFileName = os.path.basename( soundFilePath )
print( 'Unable to play "{}" sound.'.format(soundFileName) )
print( err )
# Stop the stream
if stream:
stream.stop_stream()
stream.close()
# Close PyAudio
if p:
p.terminate()
# Close the wav file
if wf:
wf.close()
You could similarly use the Event to instead cancel any existing audio playback (by checking in the loop) before starting the latest one.

Categories