I was trying to create a voice assistant using python... but my code is showing some weird texts when I run it... down below is my code
from email.mime import audio
from logging.config import listen
import re
from neuralintents import GenericAssistant
import speech_recognition
import sys
import pyttsx3 as tts
recognizer = speech_recognition.Recognizer()
speaker = tts.init()
speaker.setProperty('rate',150)
def create_note():
global recognizer
speaker.say("What do you want to say?")
speaker.runAndWait()
done=False
while not done:
try:
with speech_recognition.Microphone() as mic:
recognizer.adjust_for_ambient_noise(mic, duration=0.2)
audio= recognizer.listen(mic)
note = recognizer.recognize_google(audio)
note = note.lower()
speaker.say("choose a file name")
speaker.runAndWait
recognizer.adjust_for_ambient_noise(mic, duration=.2)
audio = listen(mic)
filename = recognizer.recognize_google(audio)
filename = filename.lower()
with open(filename,'w' ) as f:
f.write(note)
done= True
speaker.say={f"I saved the note {filename}"}
speaker.runAndWait()
except speech_recognition.UnknownValueError:
recognizer = speech_recognition.Recognizer()
speaker.say("I dont got that, please say it again")
speaker.runAndWait()
mappings = {'greeting': create_note}
assitant = GenericAssistant('intents.json',intent_methods=mappings)
assitant.train_model()
I expect it to get the input from the mic then go through a JSON file and say the next thing according to the JSON file....
here is the contents of the JSON file
JSON FILE
Related
I'm currently working on a project where I request a phone call (Mp3) and have to make an automatic transcript through a python script.
I'm using the Azure Speech to text services and got that all working, but that service only supports a Wav. file and I am still stuck at that part.
import azure.cognitiveservices.speech as speechsdk
import time
from os import path
from pydub import AudioSegment
import requests
import hashlib
OID = ***
string = f"***"
encoded = string.encode()
result = hashlib.sha256(encoded)
resultHash = (result.hexdigest())
r = requests.get(f"***", headers={f"***":f"{***}"})
Telefoongesprek = r
# converts audio file (mp3 to Wav.)
#src = Telefoongesprek
#dst = "Telefoongesprek #****.wav"
#sound = AudioSegment.from_mp3(src)
#sound.export(dst, format="wav")
def speech_recognize_continuous_from_file():
speech_config = speechsdk.SpeechConfig(subscription="***", region="***")
speech_config.speech_recognition_language = "nl-NL"
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek #****.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
#speech_recognizer.recognizing.connect(handle_final_result)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_started.connect(handle_final_result)
speech_recognizer.session_stopped.connect(handle_final_result)
speech_recognizer.canceled.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
print(all_results)
speech_recognize_continuous_from_file()
Thats the code im using without all the keys and encryption, and everthing works apart from the convert from MP3 to Wav.
is there any way I can save the requested file locally in this script and pass it through in:
audio_config = speechsdk.audio.AudioConfig(filename="Telefoongesprek #****.wav"). or do I have to save it to the pc and do it another way.
I have been stuck on this problem for over a week and have tried many different ways.
Thanks in advance!
Beau van der Meer
You should be able to save the response data ( you can access the raw bytes with r.content) to a .mp3 file locally and then pass that file path to pydub.
with open('path/to/local/file.mp3', 'wb') as f:
f.write(r.content)
Another option is to use the module io.BytesIO from the standard library.
If you pass it raw bytes, e g import io; f = io.BytesIO(r.content), it will give you a object that behaves like an open filehandle back, which you can pass to functions accepting files. I didn't check that pydub method you are trying to use accepts filehandles or only paths, so you have to check that first.
I am working on a project with speech recognition module and due to my different language, I want to save on text file so I can read and have proper output; but when I tried sys module to save the file it came with some errors. If you help me to fix this I`ll be appreciate
This is my code:
import speech_recognition as sr
import sys
r = sr.Recognizer()
print('How can I help you?')
def my_first_sr():
while True:
try:
with sr.Microphone() as mic:
r.adjust_for_ambient_noise(mic)
audio = r.listen(mic)
text = r.recognize_google(audio, language = 'fa-IR')
print(text)
except sr.UnknownValueError:
print('I didn`t understand!')
except sr.RequestError:
print('Sorry my service is down')
my_first_sr()
output = open('Speech Recognition.txt', 'w')
sys.stdout = output
print(text)
output.close()
You have to write the text to the file output:
import speech_recognition as sr
r = sr.Recognizer()
print('How can I help you?')
def my_first_sr():
while True:
try:
with sr.Microphone() as mic:
r.adjust_for_ambient_noise(mic)
audio = r.listen(mic)
text = r.recognize_google(audio, language = 'fa-IR')
print(text)
output = open('Speech Recognition.txt', 'w')
output.write(text)#Write the text to the file
output.close()
except sr.UnknownValueError:
print('I didn`t understand!')
except sr.RequestError:
print('Sorry my service is down')
my_first_sr()
I already tried this code to convert my large wav file to text
import speech_recognition as sr
r = sr.Recognizer()
hellow=sr.AudioFile('hello_world.wav')
with hellow as source:
audio = r.record(source)
try:
s = r.recognize_google(audio)
print("Text: "+s)
except Exception as e:
print("Exception: "+str(e))
But it is not converting it accurately, the reason I feel it's the 'US' accent.
Please tell me how i can convert whole large wav file accurately.
Google's speech to text is very effective, try the below link,
https://cloud.google.com/speech-to-text/
You can choose the language (English US in your case) and also upload files.
Like #bigdataolddriver commented 100% accuracy is not possible yet, and will be worth millions.
Google speech to text has three types of APIs
Synchronous, Asynchronous and streaming, in which asynchronous allows you to ~480 minutes audio conversion while others will only let you ~1 minute. Following is the sample code to do the conversion.
filepath = "~/audio_wav/" #Input audio file path
output_filepath = "~/Transcripts/" #Final transcript path
bucketname = "callsaudiofiles" #Name of the bucket created in the step before
# Import libraries
from pydub import AudioSegment
import io
import os
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
import wave
from google.cloud import storage
Speech to text support wav files with LINEAR16 or MULAW encoded audio.
Below is the code to get the frame rate and channel with code.
def frame_rate_channel(audio_file_name):
with wave.open(audio_file_name, "rb") as wave_file:
frame_rate = wave_file.getframerate()
channels = wave_file.getnchannels()
return frame_rate,channels
and the code below is the does the asynchronous conversion.
def google_transcribe(audio_file_name):
file_name = filepath + audio_file_name
# The name of the audio file to transcribe
frame_rate, channels = frame_rate_channel(file_name)
if channels > 1:
stereo_to_mono(file_name)
bucket_name = bucketname
source_file_name = filepath + audio_file_name
destination_blob_name = audio_file_name
upload_blob(bucket_name, source_file_name, destination_blob_name)
gcs_uri = 'gs://' + bucketname + '/' + audio_file_name
transcript = ''
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=frame_rate,
language_code='en-US')
# Detects speech in the audio file
operation = client.long_running_recognize(config, audio)
response = operation.result(timeout=10000)
for result in response.results:
transcript += result.alternatives[0].transcript
delete_blob(bucket_name, destination_blob_name)
return transcript
and this is how you write them to file
def write_transcripts(transcript_filename,transcript):
f= open(output_filepath + transcript_filename,"w+")
f.write(transcript)
f.close()
Kindly let me know if you need any further clarifications.
Here is my python code..
import pyttsx3;
engine = pyttsx3.init(driverName='sapi5')
infile = "tanjil.txt"
f = open(infile, 'r')
theText = f.read()
f.close()
engine.say(theText)
engine.runAndWait()
I couldn't save the file to audio file
As of July 14 2019, I'm able to save to file with the pyttsx3 library (without using another library or internet connection).
It doesn't appear to be documented, but looking at the source code in github for the Engine class in "engine.py" (https://github.com/nateshmbhat/pyttsx3/blob/master/pyttsx3/engine.py), I was able to find a "save_to_file" function:
def save_to_file(self, text, filename, name=None):
'''
Adds an utterance to speak to the event queue.
#param text: Text to sepak
#type text: unicode
#param filename: the name of file to save.
#param name: Name to associate with this utterance. Included in
notifications about this utterance.
#type name: str
'''
self.proxy.save_to_file(text, filename, name)
I am able to use this like:
engine.save_to_file('the text I want to save as audio', path_to_save)
Not sure the format - it's some raw audio format (I guess it's maybe something like aiff) - but I can play it in an audio player.
If you install pydub:
https://pypi.org/project/pydub/
then you can easily convert this to mp3, e.g.:
from pydub import AudioSegment
AudioSegment.from_file(path_to_save).export('converted.mp3', format="mp3")
I've tried #Brian's solution but it didn't work for me.
I searched around a bit and I couldn't figure out how to save the speech to mp3 in pyttx3 but I found another solution without pyttx3.
It can take a .txt file and directly output a .wav file,
def txt_zu_wav(eingabe, ausgabe, text_aus_datei = True, geschwindigkeit = 2, Stimmenname = "Zira"):
from comtypes.client import CreateObject
engine = CreateObject("SAPI.SpVoice")
engine.rate = geschwindigkeit # von -10 bis 10
for stimme in engine.GetVoices():
if stimme.GetDescription().find(Stimmenname) >= 0:
engine.Voice = stimme
break
else:
print("Fehler Stimme nicht gefunden -> Standard wird benutzt")
if text_aus_datei:
datei = open(eingabe, 'r')
text = datei.read()
datei.close()
else:
text = eingabe
stream = CreateObject("SAPI.SpFileStream")
from comtypes.gen import SpeechLib
stream.Open(ausgabe, SpeechLib.SSFMCreateForWrite)
engine.AudioOutputStream = stream
engine.speak(text)
stream.Close()
txt_zu_wav("test.txt", "test_1.wav")
txt_zu_wav("It also works with a string instead of a file path", "test_2.wav", False)
This was tested with Python 3.7.4 on Windows 10.
import pyttsx3
engine = pyttsx3.init("sapi5")
voices = engine.getProperty("voices")[0]
engine.setProperty('voice', voices)
text = 'Your Text'
engine.save_to_file(text, 'name.mp3')
engine.runAndWait() # don't forget to use this line
Try the following code snippet to convert text to audio and save it as an mp3 file.
import pyttsx3
from pydub import AudioSegment
engine = pyttsx3.init('sapi5')
engine.save_to_file('This is a test phrase.', 'test.mp3') # raw audio file
engine.runAndWait()
AudioSegment.from_file('test.mp3').export('test.mp3', format="mp3") # audio file in mp3 format
NB: pyttsx3 save_to_file() method creates a raw audio file and it won't be useful for other applications to use even if we are able to play it in the media player. pydub is a useful package to convert raw audio into a specific format.
I have a constraint with my syntax. I just tried python some time ago and I started it from AI. I make a bot like Jarvis to help me like opening google or youtube. From the tutorial available in Pythonspot.com it shows for Ubuntu tutorial but I use Windows. And there are some tools or plugins that do not work in windows is mpg321. I have found a replacement with mixer.music to play the sound of the AI. This works but I have constraints on the second sound, ie i make the first sound with audio.mp3 and it works then when my second voice uses the same filename ie audio.mp3 and i have constraints like this
Traceback (most recent call last):
File "D:\#AI Projects\Jarvis\Jarvis.py", line 71, in
jarvis(data)
File "D:\#AI Projects\Jarvis\Jarvis.py", line 53, in jarvis speak(ctime())
File "D:\#AI Projects\Jarvis\Jarvis.py", line 17, in speak tts.save("audio.mp3")
File "C:\Users\inialdan\AppData\Local\Programs\Python\Python36\lib\site-packages\gtts\tts.py",
line 110, in save
with open(savefile, 'wb') as f: PermissionError: [Errno 13] Permission denied: 'audio.mp3'
This is my code
#!/usr/bin/env python3
# Requires PyAudio and PySpeech.
import speech_recognition as sr
from time import ctime
import time
import os
import subprocess
from gtts import gTTS
from pygame import mixer
def speak(audioString):
print(audioString)
tts = gTTS(text=audioString, lang='en')
tts.save("audio.mp3")
mixer.init()
mixer.music.load('D:/#AI Projects/Jarvis/audio.mp3')
mixer.music.play()
def recordAudio():
r = sr.Recognizer()
with sr.Microphone() as source:
print("Try to say something!")
audio = r.listen(source)
data = ""
try:
data = r.recognize_google(audio)
print("You said : " + data)
except sr.UnknownValueError:
print("I'm Sorry, i couldn't understand what you mean ...")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
return data
def jarvis(data):
CHROME = os.path.join('C:\\', 'Program Files (x86)', 'Google', 'Chrome', 'Application', 'chrome.exe')
if "jarvis" in data:
speak("Yes, sir ?")
if "what is your name" in data:
speak("You can call me, Jarvis")
if "where do you leave" in data:
speak("In your heart.")
if "how are you" in data:
speak("I am fine")
if "what time is it" in data:
speak(ctime())
if "where is" in data:
data = data.split(" ")
location = data[2]
speak("Hold on Aldan, I will show you where " + location + " is.")
os.system('taskkill /im chrome.exe')
subprocess.call([CHROME, "https://www.google.nl/maps/place/" + location + "/&"])
if "open" in data:
data = data.split(" ")
application = data[1]
speak("Hold on Aldan, I will show you " + application)
os.system('taskkill /im chrome.exe')
subprocess.call([CHROME, "https://www." + application + ".com"])
time.sleep(2)
speak("Hi Aldan, How may I assist you?")
while 1:
data = recordAudio()
jarvis(data)
I have tried it with os.remove (); to remove the audio.mp3 and rewrite it. but still failed
I have modified Speak method with the help of TemporaryFile.Click on the icon to check the Code
from googletrans import Translator
import pygame, time
import tempfile
from gtts import gTTS
from pygame import mixer
from tempfile import TemporaryFile
def speak(text, lang='en'):
"""Text to speech. For funp."""
try:
translator = Translator()
tts = gTTS(text=translator.translate(text, dest=lang).text, lang=lang)
mixer.init()
sf = TemporaryFile()
tts.write_to_fp(sf)
sf.seek(0)
mixer.music.load(sf)
mixer.music.play()
except Exception:
raise
Just create a second def that removes the filename and run that command after your speak()
def complete():
os.remove('audio.mp3')
speak("blah blah")
complete()
I did this, it worked. You may have to guess the length of the sound. For mine, 2 sec is long enough.
voice = gtts.gTTS(item)
voice.save("temp.mp3")
sound = SoundLoader.load("temp.mp3")
sound.play()
time.sleep(2)
sound.stop()
os.remove("temp.mp3")