I cannot play .wav file and a windows alert sound is played - python

from gtts import gTTS
import winsound
Text = "Hello world"
Audio = gTTS(text=Text,lang="en",slow=False)
Audio.save("T.wav")
winsound.PlaySound("T.wav", winsound.SND_FILENAME)
When the code is run there are no errors but a default windows alert sound is played instead of the test.wav file.

Related

I keep getting a permission denial error whenever I try to access a file in a speech recognition program

I'm trying out speech recognition and using it as input for some statements while having the program "speak" back to me using the playsound and gTTS modules. But I have ran into an issue that I can't find the solution for, I tried the most common solutions but with no luck.
The program uses the playsound, speech_recognition, and gTTS modules and two functions; speak() lets the program speak back to the user using google's text to sound translation, and get_audio() that receives input from the user's microphone using speech_recognition's recognizer and microphone classes.
import os
import time
import playsound
import speech_recognition as sr
from gtts import gTTS
run = True
def speak(text):
tts = gTTS(text=text, lang="en")
filename = "voice.mp3"
tts.save(filename)
playsound.playsound(filename)
def get_audio():
r = sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
said = ""
try:
said = r.recognize_google(audio)
print(said)
except Exception as e:
print("Exception: " + str(e))
return said
while run == True:
text = get_audio()
if "hello" in text:
speak("Hello, how are you?")
if "what are you" in text:
print("")
speak("I am a speech recognition program")
if "goodbye" in text:
speak("Talk to you later" or "Bye" or "Cya")
run = False
I have the program set up with a while loop so a conversation can play out, and it only breaks once the user says "Goodbye". The problem seems to be that the .mp3 file (voice.mp3 which is what the speak() function uses to store audio for the program to play back) can't be accessed after its creation. Both the python file and mp3 file are stored within the same folder.
Here is the error message in full:
who are you
hello
Traceback (most recent call last):
File "c:\Users\User\OneDrive\Documents\VS Code Projects\Speech Recognition\main_va.py", line 34, in <module>
speak("Hello, how are you?")
File "c:\Users\User\OneDrive\Documents\VS Code Projects\Speech Recognition\main_va.py", line 12, in speak
tts.save(filename)
File "C:\Python\Python310\lib\site-packages\gtts\tts.py", line 328, in save
with open(str(savefile), "wb") as f:
PermissionError: [Errno 13] Permission denied: 'voice.mp3'
PS C:\Users\User\OneDrive\Documents\VS Code Projects\Speech Recognition>
I received a response on the first call ("who are you"), but then the error message popped up after the second call ("hello").
Specs: python 3.10.4
playsound 1.2.2
Rest is up to date
Your solution works fine.
I just would tweak it to leave the file behind (in case you want to listen to it for testing purposes) and instead, remove it at the beginning if it exists.
Also passing filename to your function ensures nothing is hard coded
def speak(text, filename):
if os.path.exists(filename):
os.remove(filename)
tts = gTTS(text=text, lang="en")
tts.save(filename)
playsound.playsound(filename)
I found a solution that seems to work just fine; I delete the .mp3 file each time after I use it, so at the end of the speak() function I just use os.remove(filename) and then the next time it wants to say something a new file is created.
I found some other solutions saying that you should rename the filename every time you make one, but that would make too much clutter for me.
Here is the change that I made to my code, it was just a single line within the speak() function:
def speak(text):
tts = gTTS(text=text, lang="en")
filename = "voice.mp3"
tts.save(filename)
playsound.playsound(filename)
os.remove("voice.mp3")
This works perfectly for me so far, it can take in as many inputs as needed since the file is deleted and recreated every time the speak() function is used.
Again if a better and more efficient solution is suggested or found, I'll gladly take it.

Pygame mixer.music.load() does not work with gTTs

I'm trying to play a gTTs voice with pygame.mixer.music.load() only. I don't want to save the voice into a file, so I saved it into a BytesIO stream. gTTs returns .mp3 audio which I know has limited support by pygame, so I tried to convert the .mp3 audio to .wav using the pydub module, but I couldn't find a way to do so without saving it to a file. How can I fix this issue in any way possible?
from pygame import mixer
from gtts import gTTS
def play(buffer):
buffer.seek(0)
mixer.music.load(buffer) #Load the mp3
print("Sound loaded. Time to play!")
mixer.music.play() #Play it
def generate_voice(text, accent):
mp3_fp = BytesIO()
tts = gTTS(text)
tts.write_to_fp(mp3_fp)
return mp3_fp
text = "Hi there"
buffer = generate_voice(text, accent)
play(buffer)
The error returned by pygame.mixer.music.load(): pygame.error: ModPlug_Load failed
I fixed this issue by using pydub to convert the audio into a wav format:
def play(buffer):
mixer.init()
mixer.music.load(buffer) #Load the mp3
print("Sound loaded. Time to play!")
mixer.music.play() #Play it
def generate_voice(text, lang):
fp = BytesIO()
wav_fp = BytesIO()
tts = gTTS(text=text, lang=lang)
tts.write_to_fp(fp)
fp.seek(0)
sound = AudioSegment.from_file(fp)
wav_fp = sound.export(fp, format = "wav")
return wav_fp

Code works on Pycharm but gets blocked in the console

I've coded a python Youtube to MP3 tool to download youtube mp3s using a keystroke, here's my code:
from pytube import YouTube
from moviepy.editor import *
import os, shutil
import keyboard as kb
def keyboard():
if kb.is_pressed("ctrl+i"):
try:
dl()
except:
pass
def dl():
print("Downloading...")
win32clipboard.OpenClipboard()
url = win32clipboard.GetClipboardData()
win32clipboard.CloseClipboard()
mp4 = YouTube(url).streams.get_highest_resolution().download()
mp3 = mp4.split(".mp4", 1)[0] + ".mp3"
video_clip = VideoFileClip(mp4)
audio_clip = video_clip.audio
audio_clip.write_audiofile(mp3)
audio_clip.close()
video_clip.close()
os.remove(mp4)
shutil.move(mp3, r"C:\Users\eghos\Documents\YouTubeDownloads0")
while True:
keyboard()
When I run it on Pycharm, it works well but when I run it in the terminal, it gets blocked after closing the clipboard.
Thank you for your help
Have you tried running cmd as administrator?

How to listen to IBM Watson Text To Speech result without saving to a file (python)

I am writing a simple python program that gets a text file then uses IBM Watson Text To Speech to convert it to audio then play the audio directly using a module such as playsound.
most of the tutorials shows you how to save the result to a file only and not how to pass it so a module to play the audio
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
authenticator = IAMAuthenticator('{apikey}')
text_to_speech = TextToSpeechV1(
authenticator=authenticator
)
text_to_speech.set_service_url('{url}')
with open('hello_world.wav', 'wb') as audio_file:
audio_file.write(
text_to_speech.synthesize(
'Hello world',
voice='en-US_AllisonVoice',
accept='audio/wav'
).get_result().content)
that's not what i want , I want to be able to play the audio without saving it, how can i do that.
If you are open for external libraries, you can install vlc binding for python using pip install python-vlc
And use player method to play audio directly from the content as below.
import vlc
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
authenticator = IAMAuthenticator('{apikey}')
text_to_speech = TextToSpeechV1(
authenticator=authenticator
)
text_to_speech.set_service_url('{url}')
#define VLC instance
instance = vlc.Instance('--input-repeat=-1', '--fullscreen')
#Define VLC player
player=instance.media_player_new()
#Define VLC media
media=instance.media_new(
text_to_speech.synthesize(
'Hello world',
voice='en-US_AllisonVoice',
accept='audio/wav').get_result().content)
#Set player media
player.set_media(media)
#Play the media
player.play()
Advantage of vlc player is that you can play most media types directly from URL (not just mp3) and also perform player like options such as
>>> play.pause() #pause play back
>>> player.play() #resume play back
>>> player.stop() #stop play back
*credits

how can i convert a text file to mp3 file using python pyttsx3 and sapi5?

Here is my python code..
import pyttsx3;
engine = pyttsx3.init(driverName='sapi5')
infile = "tanjil.txt"
f = open(infile, 'r')
theText = f.read()
f.close()
engine.say(theText)
engine.runAndWait()
I couldn't save the file to audio file
As of July 14 2019, I'm able to save to file with the pyttsx3 library (without using another library or internet connection).
It doesn't appear to be documented, but looking at the source code in github for the Engine class in "engine.py" (https://github.com/nateshmbhat/pyttsx3/blob/master/pyttsx3/engine.py), I was able to find a "save_to_file" function:
def save_to_file(self, text, filename, name=None):
'''
Adds an utterance to speak to the event queue.
#param text: Text to sepak
#type text: unicode
#param filename: the name of file to save.
#param name: Name to associate with this utterance. Included in
notifications about this utterance.
#type name: str
'''
self.proxy.save_to_file(text, filename, name)
I am able to use this like:
engine.save_to_file('the text I want to save as audio', path_to_save)
Not sure the format - it's some raw audio format (I guess it's maybe something like aiff) - but I can play it in an audio player.
If you install pydub:
https://pypi.org/project/pydub/
then you can easily convert this to mp3, e.g.:
from pydub import AudioSegment
AudioSegment.from_file(path_to_save).export('converted.mp3', format="mp3")
I've tried #Brian's solution but it didn't work for me.
I searched around a bit and I couldn't figure out how to save the speech to mp3 in pyttx3 but I found another solution without pyttx3.
It can take a .txt file and directly output a .wav file,
def txt_zu_wav(eingabe, ausgabe, text_aus_datei = True, geschwindigkeit = 2, Stimmenname = "Zira"):
from comtypes.client import CreateObject
engine = CreateObject("SAPI.SpVoice")
engine.rate = geschwindigkeit # von -10 bis 10
for stimme in engine.GetVoices():
if stimme.GetDescription().find(Stimmenname) >= 0:
engine.Voice = stimme
break
else:
print("Fehler Stimme nicht gefunden -> Standard wird benutzt")
if text_aus_datei:
datei = open(eingabe, 'r')
text = datei.read()
datei.close()
else:
text = eingabe
stream = CreateObject("SAPI.SpFileStream")
from comtypes.gen import SpeechLib
stream.Open(ausgabe, SpeechLib.SSFMCreateForWrite)
engine.AudioOutputStream = stream
engine.speak(text)
stream.Close()
txt_zu_wav("test.txt", "test_1.wav")
txt_zu_wav("It also works with a string instead of a file path", "test_2.wav", False)
This was tested with Python 3.7.4 on Windows 10.
import pyttsx3
engine = pyttsx3.init("sapi5")
voices = engine.getProperty("voices")[0]
engine.setProperty('voice', voices)
text = 'Your Text'
engine.save_to_file(text, 'name.mp3')
engine.runAndWait() # don't forget to use this line
Try the following code snippet to convert text to audio and save it as an mp3 file.
import pyttsx3
from pydub import AudioSegment
engine = pyttsx3.init('sapi5')
engine.save_to_file('This is a test phrase.', 'test.mp3') # raw audio file
engine.runAndWait()
AudioSegment.from_file('test.mp3').export('test.mp3', format="mp3") # audio file in mp3 format
NB: pyttsx3 save_to_file() method creates a raw audio file and it won't be useful for other applications to use even if we are able to play it in the media player. pydub is a useful package to convert raw audio into a specific format.

Categories