How to get output from google microphone search to python script? - python

I am currently using google to transcribe speech to text. But it is too slow. I would like to use the google microphone search box output instead (since it is almost real time). I have been reading Selenium examples but can't find anything returning the voice search text.
My existing code:
import time, wave, pymedia.audio.sound as sound
import pyautogui
import pyaudio,os
import speech_recognition as sr
f1= wave.open( 'HelloWorld.wav', 'rb' )
sampleRate1= f1.getframerate()
channels1= f1.getnchannels()
format= sound.AFMT_S16_LE
snd1= sound.Output( sampleRate1, channels1, format )
s1= f1.readframes( 800000 )
def hello():
snd1.play( s1 )
def mainfunction(source):
audio = r.listen(source)
user = (r.recognize_google(audio, language = "en-us", show_all=False))
print(user)
if user == "hello world":
residential()
else:
pass
try:
if __name__ == "__main__":
r = sr.Recognizer()
with sr.Microphone() as source:
while 1:
r.pause_threshold = 0.5
r.energy_threshold = 150
r.adjust_for_ambient_noise(source, duration = 0.5)
mainfunction(source)

Related

Why is the AI repeating the function?

When I say "Friday" "take a screenshot" it takes a screenshot. Everything okay. But it repeats this function. It says "I took a screenshot of your main screen" and takes a screenshot again and again. It is only this function. I tried with other functions but there it repeats it only one time - that is an other problem to fix.
Main code:
import functions as FF
import speakandrecognizefunctions as SRF
import datetime
import pyautogui
WAKE_WORD = "friday"
USER = "user"
PATH = "C://MeineDirection"
def success():
print("Succesful")
def screenshot():
date = datetime.datetime.now()
filename = str(date).replace(":", "-") + "-screenshot.png"
img = pyautogui.screenshot()
img.save(f'{PATH}//screenshots//{filename}')
SRF.speak("I took a screenshot of your main screen " + USER)
while True:
text = SRF.takecommandbackground()
if text.count(WAKE_WORD) > 0:
SRF.speak("Im listening " + USER)
print("\nListening....")
text = SRF.takecommand()
SCREENSHOT_STRS = ["make a screenshot", "take a screenshot", "screenshot"]
for phrase in SCREENSHOT_STRS:
if phrase in text:
screenshot()
success()
Speech recognition code:
import pyttsx3
import speech_recognition as sr
import config
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id)
WAKE_WORD = "friday"
def speak(text):
engine.say(text)
engine.runAndWait()
def takecommand():
r = sr.Recognizer()
with sr.Microphone() as source:
#r.adjust_for_ambient_noise(source, duration=0.5)
audio = r.listen(source)
said = ""
try:
said = r.recognize_google(audio)
print(said)
except Exception as e:
speak("I didnt get that !!")
print(f"Exception. Say {WAKE_WORD} and try again " + str(e))
pass
return said.lower()
def takecommandbackground():
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=0.3)
#print("Ready")
audio = r.listen(source, phrase_time_limit=4)
try:
# print("Recognizing....")
query = r.recognize_google(audio)
#print("user said : ", query)
# speak(query)
except Exception:
#print("Say that again please !! ")
return "none"
return query.lower()
Easy "take a screenshot" will trigger for "take a screenshot" & "screenshot"
Take out "screenshot" from SCREENSHOT_STRS and see what . If that works then you want to break the loop as #jasonharper suggested
You need to break the Loop, because it loops all the Time, or you clear the Text Var.

How could i get my intern computer audio in real time?

Ok so i would like to make a program that listen my computer audio in real time, and when it says some word what i defined as important it alerts me... (the code is in python), I used the speech recognition
I tried this code but it just get my microphone audio:
import speech_recognition as sr
from playsound import playsound
def capt():
r = sr.Recognizer()
final = ''
cont = 0
print(':')
while True:
if cont == 10:
final = ''
with sr.Microphone() as fonte:
audio = r.listen(fonte)
cont = 0
try:
text = r.recognize_google(audio, language= 'pt-BR')
final += f' {text}'
except:
print('Error')
else:
print('✔️')
div = final.split(' ')
if 'Special' in div:
playsound("som/alarme.mp3") # it plays an alarm
final = ''
div = ''
cont += 1```

Speaking a print string and JSON data from an API

So i'm working with the speech recognition library in python and i'm making calls to an API. I've created a function called speak that outputs audio from the computer. I wanted to know if its possible to have the speech recognition library to actually speak JSON data that it receives from the API. Here is a sample of my code.
import os
import time
import playsound
import speech_recognition as sr
from gtts import gTTS
import requests
import json
import jsonpath
import datetime
url = "APIURL"
key = "APIKEY"
def speak(text, json_response):
for i in range(0,1):
tts = gTTS(text= text, lang = 'en')
file1 = str("hello" + str(i) + ".mp3")
tts.save(file1)
playsound.playsound(file1,True)
os.remove(file1)
def get_audio():
r =sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
said = ''
try:
said = r.recognize_google(audio)
print(said)
except Exception as e:
print("Exception" + str(e))
return said
def print_commands(text):
if "printer state" in text:
response = requests.get(url + 'printer' + key)
# finds specific part of state operational or not
json_response = json.loads(response.text)
if response.status_code == 200:
speak('state:', json_response['state']['text'])
elif response.status_code == 409:
speak("couldn't get any data")
while True:
if __name__ == '__main__':
try:
text = get_audio()
print_commands(text)
except KeyboardInterrupt:
exit()
As of now it will speak the string that says State but nothing else after that I appreciate any help i can get with this.

Python speech_recognition.UnknownValueError

I'm trying to make a virtual assistant, right now it's suppost to just write down what I say. However when I try to test it it returns,
Traceback (most recent call last):
File "/Users/danieldossantos/Desktop/jarvis/chats/main.py", line 14, in
speech = r.recognize_google(audio, language = 'pt')
File "/Library/Python/2.7/site-packages/speech_recognition/init.py", line 858, in recognize_google
if not isinstance(actual_result, dict) or len(actual_result.get("alternative", [])) == 0: raise UnknownValueError()
speech_recognition.UnknownValueError
I've checked my code and I haven't found any errors, at least not that I know of,
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as s:
r.adjust_for_ambient_noise(s)
while True:
audio = r.listen(s)
speech = r.recognize_google(audio, language = 'pt')
print('Você disse: ', speech)
yes, for me now its working. The problem was with audio ports, as most of our laptop have 2 ports :
1. audio out (Green colour)
2. microphone (pink colour)
You need to put your headphone jack in Audio out so that it can accept your speech as input.
As Your code is unable to get any input it returns error saying empty list []
get("alternative", [])) == 0.
import speech_recognition as sr
from os import walk
r = sr.Recognizer()
#optional
#r.energy_threshold = 300
def startConvertion(path = 'file.wav', lang = 'en-IN'):
with sr.AudioFile(path) as source:
#print('Fetching File')
audio_file = r.record(source)
print(r.recognize_google(audio_file, language=lang))
startConvertion()

How can i make the python to wait till i complete speaking?

I am writing a program to recognise the speech from a microphone and the code will process accordingly. The code I wrote for this purpose is below.
import speech_recognition as sr
import webbrowser
import pyttsx
from time import sleep
engine = pyttsx.init()
engine.setProperty('rate', 70)
r = sr.Recognizer()
def recognize(audio):
try:
return r.recognize(audio)
except LookupError, e:
print e
return ''
with sr.Microphone() as source:
while True:
engine.say("Hi How can i help you ?")
sleep(0.15)
print "Start Speaking"
audio = r.listen(source)
words = recognize(audio)
print("You said " + words)
if words == "Facebook":
engine.say("Shall i open the Facebook page for you ?")
engine.runAndWait()
audio = r.listen(source)
words = recognize(audio)
if words == "Yes":
webbrowser.open('https://www.facebook.com')
elif words == "stop":
break
Here I tried sleep also but before the engine speaks I can see the text Start Speaking getting printed. Instead of Sleep, is there any nice way to capture the speech in microphone and wait till say something or for a long silence?
This method:
engine.runAndWait()
waits for speech to complete. You need to use it not just after engine.say("Shall i open the Facebook page for you ?"), but also after engine.say("Hi How can i help you ?")
instead of sleep
I normally use global variables which are frowned upon but the following is correct I think? The following two def's should help...
# contains reusable print and speech
def output_modes(output):
engine = pyttsx3.init()
print(f"Output: {output}")
engine.say(output)
engine.runAndWait()
# contains reusable grabbing audio
def input_modes():
r1 = sr.Recognizer()
mic1 = sr.Microphone()
with mic1:
try:
output = r1.recognize_google(r1.listen(mic1))
output_modes()
except sr.UnknownValueError:
output = "Unknown Error M1"
output_modes()
except sr.RequestError as e:
output = "Error M2; {0}".format(e)
output_modes()
You should be able to write a While loop that can call on input_modes() to listen or output_modes to speak for example
def interact():
if input == 'Hello':
output = 'Hi there'
output_modes

Categories