How could i get my intern computer audio in real time? - python

Ok so i would like to make a program that listen my computer audio in real time, and when it says some word what i defined as important it alerts me... (the code is in python), I used the speech recognition
I tried this code but it just get my microphone audio:
import speech_recognition as sr
from playsound import playsound
def capt():
r = sr.Recognizer()
final = ''
cont = 0
print(':')
while True:
if cont == 10:
final = ''
with sr.Microphone() as fonte:
audio = r.listen(fonte)
cont = 0
try:
text = r.recognize_google(audio, language= 'pt-BR')
final += f' {text}'
except:
print('Error')
else:
print('✔️')
div = final.split(' ')
if 'Special' in div:
playsound("som/alarme.mp3") # it plays an alarm
final = ''
div = ''
cont += 1```

Related

Why is the AI repeating the function?

When I say "Friday" "take a screenshot" it takes a screenshot. Everything okay. But it repeats this function. It says "I took a screenshot of your main screen" and takes a screenshot again and again. It is only this function. I tried with other functions but there it repeats it only one time - that is an other problem to fix.
Main code:
import functions as FF
import speakandrecognizefunctions as SRF
import datetime
import pyautogui
WAKE_WORD = "friday"
USER = "user"
PATH = "C://MeineDirection"
def success():
print("Succesful")
def screenshot():
date = datetime.datetime.now()
filename = str(date).replace(":", "-") + "-screenshot.png"
img = pyautogui.screenshot()
img.save(f'{PATH}//screenshots//{filename}')
SRF.speak("I took a screenshot of your main screen " + USER)
while True:
text = SRF.takecommandbackground()
if text.count(WAKE_WORD) > 0:
SRF.speak("Im listening " + USER)
print("\nListening....")
text = SRF.takecommand()
SCREENSHOT_STRS = ["make a screenshot", "take a screenshot", "screenshot"]
for phrase in SCREENSHOT_STRS:
if phrase in text:
screenshot()
success()
Speech recognition code:
import pyttsx3
import speech_recognition as sr
import config
engine = pyttsx3.init('sapi5')
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id)
WAKE_WORD = "friday"
def speak(text):
engine.say(text)
engine.runAndWait()
def takecommand():
r = sr.Recognizer()
with sr.Microphone() as source:
#r.adjust_for_ambient_noise(source, duration=0.5)
audio = r.listen(source)
said = ""
try:
said = r.recognize_google(audio)
print(said)
except Exception as e:
speak("I didnt get that !!")
print(f"Exception. Say {WAKE_WORD} and try again " + str(e))
pass
return said.lower()
def takecommandbackground():
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source, duration=0.3)
#print("Ready")
audio = r.listen(source, phrase_time_limit=4)
try:
# print("Recognizing....")
query = r.recognize_google(audio)
#print("user said : ", query)
# speak(query)
except Exception:
#print("Say that again please !! ")
return "none"
return query.lower()
Easy "take a screenshot" will trigger for "take a screenshot" & "screenshot"
Take out "screenshot" from SCREENSHOT_STRS and see what . If that works then you want to break the loop as #jasonharper suggested
You need to break the Loop, because it loops all the Time, or you clear the Text Var.

Speaking a print string and JSON data from an API

So i'm working with the speech recognition library in python and i'm making calls to an API. I've created a function called speak that outputs audio from the computer. I wanted to know if its possible to have the speech recognition library to actually speak JSON data that it receives from the API. Here is a sample of my code.
import os
import time
import playsound
import speech_recognition as sr
from gtts import gTTS
import requests
import json
import jsonpath
import datetime
url = "APIURL"
key = "APIKEY"
def speak(text, json_response):
for i in range(0,1):
tts = gTTS(text= text, lang = 'en')
file1 = str("hello" + str(i) + ".mp3")
tts.save(file1)
playsound.playsound(file1,True)
os.remove(file1)
def get_audio():
r =sr.Recognizer()
with sr.Microphone() as source:
audio = r.listen(source)
said = ''
try:
said = r.recognize_google(audio)
print(said)
except Exception as e:
print("Exception" + str(e))
return said
def print_commands(text):
if "printer state" in text:
response = requests.get(url + 'printer' + key)
# finds specific part of state operational or not
json_response = json.loads(response.text)
if response.status_code == 200:
speak('state:', json_response['state']['text'])
elif response.status_code == 409:
speak("couldn't get any data")
while True:
if __name__ == '__main__':
try:
text = get_audio()
print_commands(text)
except KeyboardInterrupt:
exit()
As of now it will speak the string that says State but nothing else after that I appreciate any help i can get with this.

Is there any way to convert words in to numbers of speech_recognition library in python

I have tried for numbers from 1 to 10 and it works well but i need it to work with all the numbers and it is not feasible to write the code for each number.
I also need it to work in sentences too which is not happening in my code.
Help me out guys, please....
This is my code....
import speech_recognition as sr
import time
t = ['one','two','three','four','five','six','seven','eight','nine','ten']
r = sr.Recognizer()
with sr.Microphone() as source:
print('Speak anything: ')
audio = r.listen(source)
try:
text = r.recognize_google(audio)
print('You said : {} '.format(text))
time.sleep(1)
for i in range(0,10):
if (t[i] == text):
print('/n',i)
except:
print('Sorry could not recogonize your voice')
In case you don't want to take Vivek Mehta's suggestion and don't want an additional dependency, you can use a plain dictionary
import speech_recognition as sr
import time
t = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10}
r = sr.Recognizer()
with sr.Microphone() as source:
print('Speak anything: ')
audio = r.listen(source)
try:
text = r.recognize_google(audio)
print('You said : {0} {1} '.format(text, t[text]))
time.sleep(1)
except:
print('Sorry could not recogonize your voice')

How to get output from google microphone search to python script?

I am currently using google to transcribe speech to text. But it is too slow. I would like to use the google microphone search box output instead (since it is almost real time). I have been reading Selenium examples but can't find anything returning the voice search text.
My existing code:
import time, wave, pymedia.audio.sound as sound
import pyautogui
import pyaudio,os
import speech_recognition as sr
f1= wave.open( 'HelloWorld.wav', 'rb' )
sampleRate1= f1.getframerate()
channels1= f1.getnchannels()
format= sound.AFMT_S16_LE
snd1= sound.Output( sampleRate1, channels1, format )
s1= f1.readframes( 800000 )
def hello():
snd1.play( s1 )
def mainfunction(source):
audio = r.listen(source)
user = (r.recognize_google(audio, language = "en-us", show_all=False))
print(user)
if user == "hello world":
residential()
else:
pass
try:
if __name__ == "__main__":
r = sr.Recognizer()
with sr.Microphone() as source:
while 1:
r.pause_threshold = 0.5
r.energy_threshold = 150
r.adjust_for_ambient_noise(source, duration = 0.5)
mainfunction(source)

How can i make the python to wait till i complete speaking?

I am writing a program to recognise the speech from a microphone and the code will process accordingly. The code I wrote for this purpose is below.
import speech_recognition as sr
import webbrowser
import pyttsx
from time import sleep
engine = pyttsx.init()
engine.setProperty('rate', 70)
r = sr.Recognizer()
def recognize(audio):
try:
return r.recognize(audio)
except LookupError, e:
print e
return ''
with sr.Microphone() as source:
while True:
engine.say("Hi How can i help you ?")
sleep(0.15)
print "Start Speaking"
audio = r.listen(source)
words = recognize(audio)
print("You said " + words)
if words == "Facebook":
engine.say("Shall i open the Facebook page for you ?")
engine.runAndWait()
audio = r.listen(source)
words = recognize(audio)
if words == "Yes":
webbrowser.open('https://www.facebook.com')
elif words == "stop":
break
Here I tried sleep also but before the engine speaks I can see the text Start Speaking getting printed. Instead of Sleep, is there any nice way to capture the speech in microphone and wait till say something or for a long silence?
This method:
engine.runAndWait()
waits for speech to complete. You need to use it not just after engine.say("Shall i open the Facebook page for you ?"), but also after engine.say("Hi How can i help you ?")
instead of sleep
I normally use global variables which are frowned upon but the following is correct I think? The following two def's should help...
# contains reusable print and speech
def output_modes(output):
engine = pyttsx3.init()
print(f"Output: {output}")
engine.say(output)
engine.runAndWait()
# contains reusable grabbing audio
def input_modes():
r1 = sr.Recognizer()
mic1 = sr.Microphone()
with mic1:
try:
output = r1.recognize_google(r1.listen(mic1))
output_modes()
except sr.UnknownValueError:
output = "Unknown Error M1"
output_modes()
except sr.RequestError as e:
output = "Error M2; {0}".format(e)
output_modes()
You should be able to write a While loop that can call on input_modes() to listen or output_modes to speak for example
def interact():
if input == 'Hello':
output = 'Hi there'
output_modes

Categories