How to transcribe the recording for speech recognization - python

After downloading and uploading files related to the mozilla deeepspeech, I started using google colab. I am using mozilla/deepspeech for speech recognization. The code shown below is for recording my audio. After recording the audio, I want to use a function/method to transcribe the recording into text. Everything compiles, but the text does not come out correctly. Any thoughts in my code?
"""
To write this piece of code I took inspiration/code from a lot of places.
It was late night, so I'm not sure how much I created or just copied o.O
Here are some of the possible references:
https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/
https://stackoverflow.com/a/18650249
https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/
https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/
https://stackoverflow.com/a/49019356
"""
from google.colab.output import eval_js
from base64 import b64decode
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");
my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);
var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;
var handleSuccess = function(stream) {
gumStream = stream;
var options = {
//bitsPerSecond: 8000, //chrome seems to ignore, always 48k
mimeType : 'audio/webm;codecs=opus'
//mimeType : 'audio/webm;codecs=pcm'
};
//recorder = new MediaRecorder(stream, options);
recorder = new MediaRecorder(stream);
recorder.ondataavailable = function(e) {
var url = URL.createObjectURL(e.data);
var preview = document.createElement('audio');
preview.controls = true;
preview.src = url;
document.body.appendChild(preview);
reader = new FileReader();
reader.readAsDataURL(e.data);
reader.onloadend = function() {
base64data = reader.result;
//console.log("Inside FileReader:" + base64data);
}
};
recorder.start();
};
recordButton.innerText = "Recording... press to stop";
navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);
function toggleRecording() {
if (recorder && recorder.state == "recording") {
recorder.stop();
gumStream.getAudioTracks()[0].stop();
recordButton.innerText = "Saving the recording... pls wait!"
}
}
// https://stackoverflow.com/a/951057
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()
sleep(2000).then(() => {
// wait 2000ms for the data to be available...
// ideally this should use something like await...
//console.log("Inside data:" + base64data)
resolve(base64data.toString())
});
}
});
</script>
"""
def get_audio():
display(HTML(AUDIO_HTML))
data = eval_js("data")
binary = b64decode(data.split(',')[1])
process = (ffmpeg
.input('pipe:0')
.output('pipe:1', format='wav')
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
)
output, err = process.communicate(input=binary)
riff_chunk_size = len(output) - 8
# Break up the chunk size into four bytes, held in b.
q = riff_chunk_size
b = []
for i in range(4):
q, r = divmod(q, 256)
b.append(r)
# Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
riff = output[:4] + bytes(b) + output[8:]
sr, audio = wav_read(io.BytesIO(riff))
return audio, sr
audio, sr = get_audio()
def recordingTranscribe(audio):
data16 = np.frombuffer(audio)
return model.stt(data16)
recordingTranscribe(audio)

Try this
It is perfect!
note-required python 3.6 or below...
import speech_recognition as sr
def takeCommand():
r=sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
audio=r.listen(source)
try:
statement=r.recognize_google(audio,language='en-in')
print(f"user said:{statement}\n")
except Exception as e:
#speak("Sorry, please say that again")
print('Sorry, please say that again')
return "None"
return statement
if __name__=='__main__':
statement = takeCommand().lower()
print('detecting.....')
print(statement)

Related

.py file executed by C# process not waiting to finish

I want to run .py file from my C# project, and get the result. The python script is making an API request, and returns an auth_key token, which I want to use in my C# code. The only problem is that, for some reason the C# code doesn't wait for the process to finish, and thus that not every account has auth_key. Here is my C# code.
private static void GenerateTokens()
{
var url = ConfigurationManager.AppSetting[GeSettingsNode() + ":ip"];
for (int i = 0; i < accounts.Count; i++)
{
ProcessStartInfo start = new ProcessStartInfo();
start.FileName = ConfigurationManager.AppSetting["PythonPath"];
start.Arguments = string.Format($"python_operation_processor.py {accounts[i].client_key_id} {accounts[i].key_sercret_part} {url}");
start.UseShellExecute = false;
start.RedirectStandardOutput = true;
Process process = Process.Start(start);
using (StreamReader reader = process.StandardOutput)
{
accounts[i].auth_key = reader.ReadToEnd().Trim();
}
}
}
And here is my Python script ( python_operation_processor.py )that's making the API requests.
if __name__ == '__main__':
client_key_id = sys.argv[1]
client_secret = sys.argv[2]
API_URL = sys.argv[3]
nonce = str(uuid.uuid4())
d = datetime.datetime.now() - datetime.timedelta(hours=3)
timestamp = d.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
signature = b64encode(hmac.new(b64decode(client_secret), msg=bytes(client_key_id + nonce + timestamp, 'utf-8'),
digestmod=hashlib.sha256).digest()).decode('utf-8')
r = requests.post(API_URL + '/v1/authenticate',
json={'client_key_id': client_key_id, 'timestamp': timestamp, 'nonce': nonce,
'signature': signature})
if r.status_code != 200:
raise Exception('Failed to authenticate: ' + r.text)
auth_token = r.json()['token']
print(auth_token)
Do you have any idea, how I can wait for the execution of every process, and get the token for every account ?
I recently created something similar and ended up with this because, whilst waiting for the process is easy, it is tricky to get the output stream filled correctly.
The method presented also allow you to display the output into a textblock or similar in your application.
If you use it like this, the token will be written to the StringBuilder, and used as return value.
private async Task<string> RunCommand(string fileName, string args)
{
var timeoutSignal = new CancellationTokenSource(TimeSpan.FromMinutes(3));
ProcessStartInfo start = new ProcessStartInfo();
start.FileName = fileName;
start.Arguments = string.Format("{0}", args);
start.RedirectStandardOutput = true;
start.RedirectStandardError = true;
start.UseShellExecute = false;
start.CreateNoWindow = true;
var sb = new StringBuilder();
using (Process process = new Process())
{
process.StartInfo = start;
process.OutputDataReceived += (sender, eventArgs) =>
{
sb.AppendLine(eventArgs.Data); //allow other stuff as well
};
process.ErrorDataReceived += (sender, eventArgs) => {};
if (process.Start())
{
process.EnableRaisingEvents = true;
process.BeginOutputReadLine();
process.BeginErrorReadLine();
await process.WaitForExitAsync(timeoutSignal.Token);
//allow std out to be flushed
await Task.Delay(100);
}
}
return sb.ToString();
}
To render this to a textblock in a UI application, you'll need to:
implement an event which signals a new line has been read, which means forwarding the process.OutputDataReceived event.
if your thinking about a live feed, make sure you flush the stdio buffer in python setting flush to true: print(""hello world"", flush=True)
If you're using an older .net version; you can implement the WaitForExitAsync as described here: https://stackoverflow.com/a/17936541/2416958 as an extention method:
public static class ProcessHelpers
{
public static Task<bool> WaitForExitAsync(this Process process, TimeSpan timeout)
{
ManualResetEvent processWaitObject = new ManualResetEvent(false);
processWaitObject.SafeWaitHandle = new SafeWaitHandle(process.Handle, false);
TaskCompletionSource<bool> tcs = new TaskCompletionSource<bool>();
RegisteredWaitHandle registeredProcessWaitHandle = null;
registeredProcessWaitHandle = ThreadPool.RegisterWaitForSingleObject(
processWaitObject,
delegate(object state, bool timedOut)
{
if (!timedOut)
{
registeredProcessWaitHandle.Unregister(null);
}
processWaitObject.Dispose();
tcs.SetResult(!timedOut);
},
null /* state */,
timeout,
true /* executeOnlyOnce */);
return tcs.Task;
}
}

Transfer data from python to nodejs using requests and socket

I try to send data from a simple python programm to a node server. But no success. That's why I ask some help.
My simple python :
import requests
SIGNUP_URL = 'http://localhost:8000/timer'
def submit_form():
obj = {name:'whateever'}
resp = requests.post(SIGNUP_URL, data = obj)
if __name__ == '__main__':
submit_form()
my nodejs (light, I remove not concern lines) :
var http = require('http');
var express = require('express');
var app = express();
app.get('/', function (request, response) {
response.sendFile(__dirname + '/public/index.html');
});
var server = http.createServer(app);
var io = require('socket.io')(server);
const bodyParser = require('body-parser');
const path = require('path');
const {execFile, exec, spawn} = require ('child_process');
app.use(express.static('public'));
app.use(bodyParser.urlencoded({ extended: true }));
app.post('/timer', function(req, res){
res.sendFile(__dirname + '/public/status.html');
var test = "test";
var data = req.body;
var info = data.name;
io.emit('messageFromServer', { info });
console.log('info')
});
server.listen(8000, console.log("listening to port 8000"));
So, when I execute my python I want transfering to the server the data "name : whatever", then I want the server write the data into the console (to be sure the data is well sent), and I all is ok, I want to emit this data to my html page...
Thanks for helping me.
The answer :
python code :
import requests
SIGNUP_URL = 'http://localhost:8000/timer'
def submit_form():
obj = {'name':'whateever'}
resp = requests.post(SIGNUP_URL, data = obj)
if __name__ == '__main__':
submit_form()
nodejs code :
app.post('/timer', function(req, res){
res.sendFile(__dirname + '/public/status.html');
var info= req.body;
io.emit('messageFromServer', info);
console.log(info)
});
It works.

How to send image in C# to a Flask Server that is decoded by OpenCV?

Here is part of my Flask API in Python:
image_data = flask.request.get_data() # image_data's data type
string image_vector = numpy.frombuffer(image_data, dtype=numpy.uint8)
image = cv2.imdecode(image_vector, cv2.IMREAD_COLOR)
How would I send a image that I encoded like below, in C#:
ResultString = "Loading...";
var surface = SKSurface.Create(new SKImageInfo((int)canvasView.CanvasSize.Width,
(int)canvasView.CanvasSize.Height));
var canvas = surface.Canvas;
canvas.Clear();
foreach (SKPath path in completedPaths)
canvas.DrawPath(path, paint);
foreach (SKPath path in inProgressPaths.Values)
canvas.DrawPath(path, paint);
canvas.Flush();
var snap = surface.Snapshot();
var pngImage = snap.Encode(SKEncodedImageFormat.Png, 100);
AnalyerResults analyerResults = mathclient.AnalyzeWork(pngImage);
try { ResultString = analyerResults.message; } catch { ResultString = "Error..."; }
How would I send the image to in C# to be able to be received and decoded like shown in part of my API?
I already tried:
HttpClient client = await GetClient();
var result = await client.PostAsync(Url + "analyzer", new ByteArrayContent(pngImage.ToArray()));
return JsonConvert.DeserializeObject<AnalyerResults>(await result.Content.ReadAsStringAsync());
I also tried:
var client = new RestClient(Url + "analyzer");
client.Timeout = -1;
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "image/png");
request.AddParameter("image/png", pngImage, ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
return JsonConvert.DeserializeObject<AnalyerResults>(response.Content);
However in both the content returned null. This question is related to How to Replicate this Postman Request which has a Binary Content Body and contains a .PNG File in C#?.

I am not able to obtain data from process.stdout.on globally

I am trying to obtain the value of category variable using a machine learning code in python. Although when i execute the code the category variable isn't changed at all and database stores the category as "A" which is defined outside globally. As far as i know, it is due to some asynchronous behavior but i don't know the actual solution.
var category = "A";
if (type == "lost") {
var spawn = require("child_process").spawn;
var process = spawn('python', ["./evaluate_lost.py", req.body.image]);
process.stdout.on('data', function(data) {
category += data.toString();
});
var newLost = {
name: name,
date: date,
time: time,
location: location,
phone: phone,
image: image,
description: desc,
category: category,
author: author
};
// Create a new lost and save to DB
Lost.create(newLost, function(err, newlyCreated) {
if (err) {
console.log(err);
} else {
//redirect back to items page
res.redirect("/items");
}
});
}
Well i am editing the question with the evaluate_lost.py script and the directory structure.
import sys
from keras import backend as K
import inception_v4
import numpy as np
import cv2
import os
import argparse
image=sys.argv[1]
# If you want to use a GPU set its index here
os.environ['CUDA_VISIBLE_DEVICES'] = ''
# This function comes from Google's ImageNet Preprocessing Script
def central_crop(image, central_fraction):
if central_fraction <= 0.0 or central_fraction > 1.0:
raise ValueError('central_fraction must be within (0, 1]')
if central_fraction == 1.0:
return image
img_shape = image.shape
depth = img_shape[2]
fraction_offset = int(1 / ((1 - central_fraction) / 2.0))
bbox_h_start = int(np.divide(img_shape[0], fraction_offset))
bbox_w_start = int(np.divide(img_shape[1], fraction_offset))
bbox_h_size = int(img_shape[0] - bbox_h_start * 2)
bbox_w_size = int(img_shape[1] - bbox_w_start * 2)
image = image[bbox_h_start:bbox_h_start+bbox_h_size, bbox_w_start:bbox_w_start+bbox_w_size]
return image
def get_processed_image(img_path):
# Load image and convert from BGR to RGB
im = np.asarray(cv2.imread(img_path))[:,:,::-1]
im = central_crop(im, 0.875)
im = cv2.resize(im, (299, 299))
im = inception_v4.preprocess_input(im)
if K.image_data_format() == "channels_first":
im = np.transpose(im, (2,0,1))
im = im.reshape(-1,3,299,299)
else:
im = im.reshape(-1,299,299,3)
return im
if __name__ == "__main__":
# Create model and load pre-trained weights
model = inception_v4.create_model(weights='imagenet', include_top=True)
# Open Class labels dictionary. (human readable label given ID)
classes = eval(open('validation_utils/class_names.txt', 'r').read())
# Load test image!
img_path = "../public/files/lost/" + image
img = get_processed_image(img_path)
# Run prediction on test image
preds = model.predict(img)
print("Class is: " + classes[np.argmax(preds)-1])
print("Certainty is: " + str(preds[0][np.argmax(preds)]))
sys.stdout.flush()
This is the directory structure which evaluates the python script on watch.jpg which is input through HTML form
I expect the category to be as returned from python machine learning code rather than what is already defined.
The data event handler runs asynchronously, you're not waiting for all the output to be consumed.
Use the end event to detect the end of the output, and run the code that saves the new Lost object there.
var category = "A";
if (type == "lost") {
var spawn = require("child_process").spawn;
var process = spawn('python', ["./evaluate_lost.py", req.body.image]);
process.stdout.on('data', function(data) {
category += data.toString();
});
process.stdout.on('end', function() {
var newLost = {
name: name,
date: date,
time: time,
location: location,
phone: phone,
image: image,
description: desc,
category: category,
author: author
};
// Create a new lost and save to DB
Lost.create(newLost, function(err, newlyCreated) {
if (err) {
console.log(err);
} else {
//redirect back to items page
res.redirect("/items");
}
});
});
}

Calling named pipe of Dropbox to get status: Error: read EPIPE

I have working Python script that checks local Windows Dropbox application sync status that i try to port to nodejs (to use in Electron) .
The problem is that i get:
events.js:141
throw er; // Unhandled 'error' event
^
Error: read EPIPE
at exports._errnoException (util.js:907:11)
at Pipe.onread (net.js:557:26)
when i try my node script.
Here's the python script:
import win32pipe, win32ts, win32api, pywintypes, struct, sys
yourUsername = "YourUsername"
def dropbox_path_status(pathname):
return ['dropbox not running','not in dropbox','up to date','syncronising','sync problem'][dropbox_path_status_code(pathname)+1]
def dropbox_path_status_code(pathname):
processid = win32api.GetCurrentProcessId()
threadid = win32api.GetCurrentThreadId()
request_type = 1
wtf = 0x3048302
pipename = r'\\.\pipe\DropboxPipe_' + str(win32ts.ProcessIdToSessionId(processid))
request = (struct.pack('LLLL', wtf, processid, threadid, request_type) + pathname.encode('utf-16le') + (chr(0)*540))[0:540]
try:
response = win32pipe.CallNamedPipe(pipename, request, 16382, 1000)
except pywintypes.error, err:
if err[0] == 2:
return -1
else:
raise
else:
return int(response[4:-1])
print dropbox_path_status("C:\Users\"+yourUsername+"\Dropbox")
Here's my node script
var net = require('net');
var ffi = require('ffi');
var ref = require('ref');
var BufferStream = require('node-bufferstream')
var PIPE_PATH = "\\\\.\\pipe\\DropboxPipe_1"
var UserName = "YourUsername"
var L = console.log;
var SessionId = ref.alloc("uint");
var kernel32 = new ffi.Library("kernel32",{
GetLastError:['string',[]],
GetCurrentProcessId: ['int',[]] ,
GetCurrentThreadId: ['int',[]],
ProcessIdToSessionId: ['bool',['int','uint *']],
CallNamedPipeA:['bool',[]]
});
var RequestInfo = 0x3048302;
var ProcessId = kernel32.GetCurrentProcessId();
var ThreadId = kernel32.GetCurrentThreadId();
var RequestType =1;
var dropbox = ref.types.void;
if(!kernel32.ProcessIdToSessionId(ProcessId,SessionId )){
console.log(kernel32.GetLastError());
}
else{
console.log(SessionId);
SessionId.type = ref.types.int;
SessionIdInt = SessionId.deref();
console.log(SessionIdInt);
}
var TestBufferIn = new Buffer(16+724);
TestBufferIn.writeUInt32LE(RequestInfo, 0);
TestBufferIn.writeUInt32LE(ProcessId, 4);
TestBufferIn.writeUInt32LE(ThreadId, 8);
TestBufferIn.writeUInt32LE(RequestType, 12);
//TestBufferIn.writeUInt32LE(1234567, 600);
console.log(TestBufferIn.length)
var mypath = "C:/Users/"+UserName+"/Dropbox/"
TestBufferIn.write(mypath,16,162,"utf-16le")
stream = new BufferStream(TestBufferIn);
var L = console.log;
var client = net.createConnection({ path: '\\\\.\\pipe\\DropboxPipe_1'}, function () { console.log('connected');
client.write(TestBufferIn, ()=>{
console.log("write callback")
});
})
client.on('data', function(data) {
L('Client: on data:', data.toString());
client.end('Thanks!');
});
client.on('end', function() {
L('Client: on end');
})
Any idea why the error appears?
EDIT
When i write with string (instead of buffer) i get same error.
var client = net.createConnection({ path: '\\\\.\\pipe\\DropboxPipe_1'}, function () { console.log('connected');
client.write("hello", ()=>{
console.log("write callback"); //this is written in console
});
});

Categories