"OSError: No Default Input Device Available" on Google Colab - python

I installed pyaudio using "pip3 install pyaudio" on Google Colab based on this answer from angelokh's.
Then, I got this error,
OSError: No Default Input Device Available
on Google Colab.
The code is shown below.
import speech_recognition as sr
r = sr.Recognizer()
with sr.Microphone() as source:
r.adjust_for_ambient_noise(source)
audio = r.listen(source)

You can't use the mic from Google Colab directly as you do with your own machine. You have to use JavaScript to let the browser enable the mic. This can be done using the following code found here:
# all imports
from io import BytesIO
from base64 import b64decode
from google.colab import output
from IPython.display import Javascript
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
def record(sec=3):
print("Speak Now...")
display(Javascript(RECORD))
sec += 1
s = output.eval_js('record(%d)' % (sec*1000))
print("Done Recording !")
b = b64decode(s.split(',')[1])
return b #byte stream
Now you can use the record() function to record the audio. This function returns the audio as byte-stream. You can try it yourself on colab following this link:

Related

How to send OpenCV Image using python requests to Go Endpoint

Here is the code for my camera script
import cv2
import requests
from datetime import datetime
from time import sleep
def sendImage(frame):
imencoded = cv2.imencode(".jpg", frame)[1]
now = datetime.now()
seq = now.strftime("%Y%m%d%H%M%S")
file = {'file': (seq+'.jpg', imencoded.tobytes(), 'image/jpeg')}
response = requests.post("http://localhost:3004/", files=file, timeout=5)
return response
def takeImage():
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
print(sendImage(frame))
cap.release()
while 1:
takeImage()
sleep(5)
and my Go Server
package main
import (
"fmt"
"log"
"net/http"
"github.com/gorilla/mux"
)
func imgHandler(w http.ResponseWriter, r *http.Request) {
fmt.Println("recieved request!")
r.ParseMultipartForm(10 << 20)
file, handler, err := r.FormFile("myFile")
if err != nil {
fmt.Println("error!")
return
}
defer file.Close()
fmt.Println(handler.Filename)
}
func getHandler(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Hello World API!")
}
func main() {
r := mux.NewRouter()
r.HandleFunc("/", imgHandler).Methods("POST")
r.HandleFunc("/", getHandler).Methods("GET")
http.Handle("/", r)
log.Fatal(http.ListenAndServe(":3004", nil))
}
I have no idea why I keep on getting an error on my FormFile function. My end goal is to have a secure connection to an endpoint so that I can send images from my raspberry pi to my server and have it saved locally. How can I do this so I send files to my Go endpoint using the python requests library. I've already seen solutions that involve using elements on a html page that works.
In Python you call the field file where in Go you try to access myFile. The change to the Go code was:
file, handler, err := r.FormFile("file")
To find this out, I've changed the debug line to print the error as well:
if err != nil {
fmt.Printf("error: %s\n", err)
return
}
(In general, use log.Printf in servers :)

How do you record user's audio in Streamlit shearing?

I'm looking to deploy a small data collection app that allows users to record themselves saying some phrases.
Building this with streamlit.
I can get it to work locally, but can't seem to find a solution that works in the Streamlit-Shearing service.
Any ideas?
I do not fully understand your question I think this might help.
import streamlit as st
from bokeh.models.widgets import Button
from bokeh.models import CustomJS
from streamlit_bokeh_events import streamlit_bokeh_events
stt_button = Button(label="Speak", width=100)
stt_button.js_on_event("button_click", CustomJS(code="""
var recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onresult = function (e) {
var value = "";
for (var i = e.resultIndex; i < e.results.length; ++i) {
if (e.results[i].isFinal) {
value += e.results[i][0].transcript;
}
}
if ( value != "") {
document.dispatchEvent(new CustomEvent("GET_TEXT", {detail: value}));
}
}
recognition.start();
"""))
result = streamlit_bokeh_events(
stt_button,
events="GET_TEXT",
key="listen",
refresh_on_update=False,
override_height=75,
debounce_time=0)
if result:
if "GET_TEXT" in result:
st.write(result.get("GET_TEXT"))`
I have been working on implementing a custom component that enables recording audio from the client’s microphone in apps that are deployed on the web (Streamlit-Audio-Recorder). This version of the component supports downloading the recorded audio but not directly returning audio data to streamlit/Python.

passing loaded variable as argument instead of filepath python

I'm not too familiar with python, apologies if this is too trivial question
I have a script that get an audio file from an Url, I need to convert the file from .ogg type to .wav
Then I want to pass the converted and loaded file, to a function that has as argument a filepath string.
Below is my code:
import os
import pydub
import glob
import time
from io import BytesIO
import pandas as pd
from urllib.request import Request, urlopen
import urllib.error
import azure.cognitiveservices.speech as speechsdk
import time
#%%
audio_file = "https = url.whatever.com.co/audio_file.ogg"
req = Request(audio_file)
try: response = urlopen(req).read()
except urllib.error.URLError as e:
print(e.reason)
sound = pydub.AudioSegment.from_ogg(BytesIO(response))
sound_wav = sound.export(format = "wav")
speech_key, service_region = "XXXXXXXXXXXXXXXX", "eastus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_recognition_language="es-ES"
audio_filename = r"C:\some_file_path\3AC3844337F7E5CEAE95.wav"
#audio_config = speechsdk.audio.AudioConfig(sound_wav)
audio_config = speechsdk.audio.AudioConfig(audio_filename = audio_filename)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that stops continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
global done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print("Printing all results:")
print(all_results)
When I run my code with this line:
audio_config = speechsdk.audio.AudioConfig(audio_filename = audio_filename)
It works correct...
However when I run it with this line:
audio_config = speechsdk.audio.AudioConfig(sound_wav)
I get this error:
ValueError: use_default_microphone must be a bool, is "tempfile._TemporaryFileWrapper object at 0x0000020EC4297668"
The error message you got suggests that sound_wav is a temporary filename. Then, as seen in the documentation, it looks like audio_config = speechsdk.audio.AudioConfig(filename = sound_wav) is what you need.
As you used audio_filename as a parameter, it could happen that a different version uses that different name. You can using that instead.

How to handle real time media analysis on a web server?

The use case: I'm developing a web app to help students learn to read. The student is recorded while reading a text on a web app. The signal is sent by segment of 200ms to the backend and analysed before the student finishes reading to give live feedback during the reading. The server will send feedback after each segment analysis.
On the web app the code looks like this:
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then(stream => {
const mediaRecorder = new MediaRecorder(stream)
mediaRecorder.start(200)
mediaRecorder.ondataavailable = event => {
socket.emit('my_event', Blob([event.data]))
}
})
On chrome, the media type produced is webm. I'm wondering how to handle the data on the backend so that I can analyse the media with numpy before the end of the recording.
For now I couldn't find a better way than something like:
from pydub import AudioSegment
def blobToSignal(blob, is_first_sequence):
webm_header = b'\x1aE...'
fp = tempfile.NamedTemporaryFile()
fp.write(blob) if is_first_sequence else fp.write(webm_header + blob)
fp.seek(0)
samples = AudioSegment.from_file(fp.name, 'webm').get_array_of_samples()
fp.close()
return samples # this is almost a numpy array (analyzable)
I tried to change the front to return a Float32Array instead of a webm:
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then(stream => {
audio_context = new AudioContext()
var audioInput = audio_context.createMediaStreamSource(stream)
var recorder = audio_context.createScriptProcessor(8096, 1, 1)
recorder.onaudioprocess = event => {
socket.emit(
'my_event',
Array.from(event.inputBuffer.getChannelData(0))
)
}
audioInput.connect(recorder)
recorder.connect(audio_context.destination)
So that the backend can use the raw signal but this method requires a too high bandwidth (~1Mb/s).
So my questions are:
am I doing something wrong here?
is there a Python librairy to decode a webm coming from a Buffer? (or something similar? I'm not so familiar with Python...)
how would you handle this use case?
Thanks for your help!

Re-opening a file in python gives permission error

I have searched and tried to implement solutions suggested here:
Errno 13 Permission denied: 'file.mp3' Python
Error while re-opening sound file in python
But there doesn't seem to be any good solutions to this. Here is my code, can anyone tell me what I am doing wrong here:
#!/usr/bin/env python3
# Requires PyAudio and PySpeech.
import time, os
import speech_recognition as sr
from gtts import gTTS
import pygame as pg
import mutagen.mp3
#Find out what input sound device is default (use if you have issues with microphone)
#import pyaudio
#sdev= pyaudio.pa.get_default_input_device()
def play_music(sound_file, volume=0.8):
'''
stream music with mixer.music module in a blocking manner
this will stream the sound from disk while playing
'''
# set up the mixer, this will set it up according to your sound file
mp3 = mutagen.mp3.MP3(sound_file)
pg.mixer.init(frequency=mp3.info.sample_rate)
pg.mixer.music.set_volume(volume)
try:
pg.mixer.music.load(sound_file)
print("HoBo Sound file {} loaded!".format(sound_file))
except pg.error:
print("HoBo Sound file {} not found! ({})".format(sound_file, pg.get_error()))
return
pg.mixer.music.play()
while pg.mixer.music.get_busy() == True:
continue
pg.mixer.quit()
sound_file.close()
def speak(audioString):
print(audioString)
tts = gTTS(text=audioString, lang='en')
tts.save("audio.mp3")
# pick a mp3 file in folder or give full path
sound_file = "audio.mp3"
# optional volume 0 to 1.0
volume = 0.6
play_music(sound_file, volume)
def audioIn():
# Record Audio from Microphone
r = sr.Recognizer()
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source)
# Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
data = r.recognize_google(audio)
print("You said: ", data)
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
return data
def hobo(data):
if "how are you" in data:
speak("I am fine")
if "what time is it" in data:
speak(time.ctime())
if "where is" in data:
data = data.split(" ")
location = data[2]
speak("Hold on Sir, I will show you where " + location + " is.")
os.system("chromium-browser https://www.google.nl/maps/place/" + location + "/&")
# Starts the program
#time.sleep(2)
speak("Testing")
while(data != "stop"):
data = audioIn()
hobo(data)
else:
quit
So I found the fix in one of the original threads I already went over. The fix was to implement a delete() function like so:
def delete():
time.sleep(2)
pg.mixer.init()
pg.mixer.music.load("somefilehere.mp3")
os.remove("audio.mp3")
and changing the play_music() function so it includes the delete() function in the end (and I removed the sound_file.close() statement of course).
Follow below method
import time
from gtts import gTTS
import pygame
def Text_to_speech():
Message = "hey there"
speech = gTTS(text=Message)
speech.save('textToSpeech.mp3')
pygame.mixer.init()
pygame.mixer.music.load("textToSpeech.mp3")
pygame.mixer.music.play()
time.sleep(3)
pygame.mixer.music.unload()

Categories