Related
Please I am not experienced with audio processing, I am trying to record and stream audio from my microphone and send the audio data to a flask server. I have set up the javascript for the recording. The thing is it is for a speech recognition engine and it needs the audio stream to be in 16bit 16Khz monochannel and wav format. I was trying to use recorder.js (https://github.com/mattdiamond/Recorderjs/blob/master/dist/recorder.js) but the context.createScriptProcessor was deprecated so I switched to using the audioworklet processor (https://developer.mozilla.org/en-US/docs/Web/API/AudioWorklet). I just took some code from the recorder.js that encodes in wav and another code that downsamples the default 44100 to 16Khz i.e 16000 samples per second. The problem is
1.The bytes I am recieving in the flask server is corrupted ( I tried writing them to a wav file and it is an wav audio that is zero seconds long)
2. I am not sure why but I think its from the javascript code (or the flask I don't know).
If any one knows where I got it wrong, or better still how I can achieve streaming in 16bit 16Khz monochannel and wav format, I would really appreciate. The codes are below.
javascript code using the audioworklet
let dataArray = [];
var recording = true;
const main = async () => {
const context = new AudioContext()
const microphone = await navigator.mediaDevices.getUserMedia({
audio:true
})
let sampleRate = 16000
let numOfChannels = 1
const source = context.createMediaStreamSource(microphone)
await context.audioWorklet.addModule('js/recorderWorkletProcessor.js')
const recorder = new AudioWorkletNode(context, "recorder.worklet")
source.connect(recorder).connect(context.destination)
recorder.port.onmessage = (e) => {
// downsample to 16KHz sample rate
downSampledData = downsampleBuffer(e.data, sampleRate, context.sampleRate)
// convert to audio/wav format
let dataView = encodeWAV(downSampledData, context, sampleRate)
dataArray.push(e.data)
// Create a blob file
let blob = new Blob([ dataView ], { type: 'audio/wav' });
// send to the server
upload(blob)
if (!recording){
console.log("RECORDING STOPPED");
recorder.disconnect(context.destination);
source.disconnect(recorder);
}
}
};
// sorry I am not using this but floatTo16BitPCM()
function convertFloat32To16BitPCM(input) {
const output = new Int16Array(input.length)
for (let i = 0; i < input.length; i++) {
const s = Math.max(-1, Math.min(1, input[i]))
output[i] = s < 0 ? s * 0x8000 : s * 0x7fff
}
return output
}
function startRec () {
// start the recording
main()
}
function stopRec () {
// stop the recording
console.log('stopped')
recording = false
}
// convert to 16Bit PCM
function floatTo16BitPCM(output, offset, input){
for (var i = 0; i < input.length; i++, offset+=2){
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string){
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
// convert to wave format
function encodeWAV(samples, context, sampleRate) {
let buffer = new ArrayBuffer(44 + samples.length * 2);
let view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, 'RIFF');
/* RIFF chunk length */
view.setUint32(4, 36 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, 'WAVE');
/* format chunk identifier */
writeString(view, 12, 'fmt ');
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, 1, true);
/* sample rate */
view.setUint32(24, sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, 1 * 2, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, 'data');
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}
const blobToBase64 = (blob) => {
// convert blob to base64 encoding
return new Promise((resolve) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = function () {
resolve(reader.result);
};
});
};
const upload = async (audioData) => {
// send the blob containing audio bytes to the flask server
var AjaxURL = 'http://127.0.0.1:5000/media';
const b64 = await blobToBase64(audioData);
const jsonString = JSON.stringify({blob: b64});
console.log(jsonString);
$.ajax({
type: "POST",
url: AjaxURL,
data: jsonString,
contentType: 'application/json;charset=UTF-8',
success: function(result) {
window.console.log(result.response);
}
});
}
function downsampleBuffer(buffer, rate, sampleRate) {
if (rate == sampleRate) {
return buffer;
}
if (rate > sampleRate) {
throw "downsampling rate show be smaller than original sample rate";
}
var sampleRateRatio = sampleRate / rate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Float32Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
// Use average value of skipped samples
var accum = 0, count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
// Or you can simply get rid of the skipped samples:
// result[offsetResult] = buffer[nextOffsetBuffer];
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
recorderWorkletProcessor.js
bufferSize = 4096
_bytesWritten = 0
_buffer = new Float32Array(this.bufferSize)
constructor () {
super()
this.initBuffer()
}
initBuffer() {
this._bytesWritten = 0
}
isBufferEmpty() {
return this._bytesWritten === 0
}
isBufferFull() {
return this._bytesWritten === this.bufferSize
}
process(inputs, outputs, parameters) {
this.append(inputs[0][0])
return true
}
append(channelData){
if (this.isBufferFull()){
this.flush()
}
if (!channelData) return
for (let i=0; i < channelData.length; i++){
this._buffer[this._bytesWritten++] = channelData[i]
}
}
flush () {
this.port.postMessage(
this._bytesWritten < this.bufferSize ? this.buffer.slice(0, this._bytesWritten) : this._buffer
)
this.initBuffer()
}
}
registerProcessor('recorder.worklet', RecorderProcessor)
finally my flask server code.
NOTE: The endpoint has to be a http endpoint that is why I am using the ajax call in the JS (not sockets)...There is a speech recognition engine server running on sockets that is why there is a socket call in async in the code. The socket server recieves BYTES of audio data.
#!/usr/bin/env python
# encoding: utf-8
from flask import Flask, jsonify, request
from flask_cors import CORS, cross_origin
import numpy as np
import soundfile as sf
import json
import logging
import base64
import asyncio
import websockets
import sys
import wave
app = Flask(__name__)
app.secret_key = "stream"
CORS(app, supports_credentials=True)
def get_byte_string(string):
delimiter = ';base64,'
splitted_string = string.split(delimiter)
return splitted_string[1]
#app.route('/media', methods=['POST'])
async def echo():
app.logger.info('Connection accepted')
has_seen_media = False
message_count = 0
chunk = None
data = json.loads(request.data)
if data is None:
app.logger.info('No message recieved')
else:
app.logger.info("Media message recieved")
blob = data['blob']
byte_str = get_byte_string(blob)
byte_str = bytes(byte_str, 'utf-8')
chunk = base64.decodebytes(byte_str)
has_seen_media = True
if has_seen_media:
app.logger.info("Payload recieved: {} bytes".format(len(chunk)))
# set up websocket here
async with websockets.connect('ws://localhost:2700') as websocket:
await websocket.send(chunk)
print (await websocket.recv())
await websocket.send('{"eof" : 1}')
print (await websocket.recv())
message_count += 1
return jsonify({'response': ''})
if __name__ == '__main__':
app.logger.setLevel(logging.DEBUG)
app.run(debug=True)
I want to save an RGB image (.jpg) to a binary file (.bin) and get the same saved data (in the .bin file) using python and c++.
Here are the codes I used to save the image to a bin file in Python and C++ but I got different results when I compared the two .bin files.
Python
image = cv2.imread('image.jpg')
filename1 = "/image.bin" # save data as bin file
bin_file = image.astype('float32').tofile(filename1)
byte_list = []
with open(filename1, "rb") as f:
while (byte := f.read(1)):
byte_list.append(byte)
C++
int IMAGE_SIZE = 224;
void matwrite(const string& filename, const Mat& mat)
{
ofstream fs(filename, fstream::binary);
// Header
int type = mat.type();
int channels = mat.channels();
fs.write((char*)&mat.rows, sizeof(int)); // rows
fs.write((char*)&mat.cols, sizeof(int)); // cols
fs.write((char*)&type, sizeof(int)); // type
fs.write((char*)&channels, sizeof(int)); // channels
// Data
if (mat.isContinuous())
{
fs.write(mat.ptr<char>(0), (mat.dataend - mat.datastart));
}
else
{
int rowsz = CV_ELEM_SIZE(type) * mat.cols;
for (int r = 0; r < mat.rows; ++r)
{
fs.write(mat.ptr<char>(r), rowsz);
}
}
}
int main()
{
// Save data
{
cv::Mat noisyImg = cv::imread("image.jpg");
//randu(m, 0, 5);
matwrite("bin_file.bin", data);
}
return 0 ;
}
What I'm looking for is to save the RGB image in both c++ and python to a binary file (.bin) with the same method to get matched data saved in both bin files (from python and C++)
Thanks.
Solved!
Updated code
Python
filename1 = "./image.bin" # save data as bin file
bin_file = image.tofile(filename1)
C++
// Data
int type = mat.type();
if (mat.isContinuous())
{
fs.write(mat.ptr<char>(0), (mat.dataend - mat.datastart));
}
else
{
int rowsz = CV_ELEM_SIZE(type) * mat.cols;
for (int r = 0; r < mat.rows; ++r)
{
fs.write(mat.ptr<char>(r), rowsz);
}
}
}
int main()
{
// Save data
{
cv::Mat Img = cv::imread("image.jpg");
//randu(m, 0, 5);
matwrite("bin_file.bin", Img);
}
return 0 ;
}
Help needed convert c# code to python. Need help converting using statement from c# to python. For the following example Using statement in the function public Dictionary
public class Class1
{
public static string GetUrlStartingAtPath(string url)
{
Regex pathEx = new Regex(#"(?in)^https?:\/{0,3}[0-9.\-A-Za-z]+(:\d+)?(?<content>.+)$", RegexOptions.Compiled);
// parse out the path
return pathEx.Match(url).Groups[1].ToString();
}
public static long GetUnixEpochTime()
{
// UNIX epoch
var epoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
var milliseconds = (long)(DateTime.Now.ToUniversalTime() - epoch).TotalMilliseconds;
return milliseconds;
}
public Dictionary<string, string> GetCodes(string url)
{
const string SBO_KEY_1 = "1";
const string SBO_KEY_TEXT_1 = "PQ/OZW8SZCU/wUwm2u+Os6oyAmiFfif6QGVAhCLUahh36ui7BJfwymytCgULDZ6G111ud6SuySii544A6Uw+Tw==";
Dictionary<string, byte[]> sboKeys = new Dictionary<string, byte[]>();
var b64 = Convert.FromBase64String(SBO_KEY_TEXT_1);
sboKeys.Add(SBO_KEY_1, Convert.FromBase64String(SBO_KEY_TEXT_1));
string scheme = "1";
long unixEpochTime = GetUnixEpochTime();
var sboKey = sboKeys[scheme];
// store the headers we'll return
var headers = new Dictionary<string, string>();
// parse out the path
string urlFromPath = GetUrlStartingAtPath(Uri.UnescapeDataString(url));
// create base message
var baseMessage = String.Format(System.Globalization.CultureInfo.CurrentCulture, "{0}:{1}", unixEpochTime, urlFromPath);
// create signable message
var signable = ASCIIEncoding.ASCII.GetBytes(baseMessage);
// create crypto class
using (var hmacsha1 = new System.Security.Cryptography.HMACSHA1(sboKey))
{
// create hash
var hash = hmacsha1.ComputeHash(signable);
// add headers
headers.Add("SNL-Request-Time", unixEpochTime.ToString(System.Globalization.CultureInfo.CurrentCulture));
headers.Add("SNL-Request-Client", String.Format(System.Globalization.CultureInfo.CurrentCulture, "{0}:{1}", scheme, Convert.ToBase64String(hash)));
}
// done
return headers;
}
public void main()
{
string url = "http://localhost/SNL.Services.Data.Api.Service/v2/Internal/General/SecurityIndexs?$select=KeyIndex,IndexShortNameDisplay&$expand=PricingMRIndexs($select=IndexPriceChange,IndexPriceChangeActual,IndexValue,PricingAsOf),SecurityIndexValues($select=IndexValue,PricingAsOf;$filter=PricingAsOf+gt+2019-01-12;$expand=IndexValueChanges($select=IndexPriceChange,IndexPriceChangeActual;$filter=KeyPricePeriod+eq+1);$orderby=PricingAsOf)&$filter=KeyIndex+eq+1+or+KeyIndex+eq+2+or+KeyIndex+eq+4+or+KeyIndex+eq+196+or+KeyIndex+eq+339&$orderby=KeyIndex&cache=3600";
var Codes = GetCodes(url);
foreach (var k in Codes.Keys)
{
Console.WriteLine(k + " = " + Codes[k]);
}
}
}
I have an Express Node.js application, but I also have a machine learning algorithm to use in Python. Is there a way I can call Python functions from my Node.js application to make use of the power of machine learning libraries?
Easiest way I know of is to use "child_process" package which comes packaged with node.
Then you can do something like:
const spawn = require("child_process").spawn;
const pythonProcess = spawn('python',["path/to/script.py", arg1, arg2, ...]);
Then all you have to do is make sure that you import sys in your python script, and then you can access arg1 using sys.argv[1], arg2 using sys.argv[2], and so on.
To send data back to node just do the following in the python script:
print(dataToSendBack)
sys.stdout.flush()
And then node can listen for data using:
pythonProcess.stdout.on('data', (data) => {
// Do something with the data returned from python script
});
Since this allows multiple arguments to be passed to a script using spawn, you can restructure a python script so that one of the arguments decides which function to call, and the other argument gets passed to that function, etc.
Hope this was clear. Let me know if something needs clarification.
Example for people who are from Python background and want to integrate their machine learning model in the Node.js application:
It uses the child_process core module:
const express = require('express')
const app = express()
app.get('/', (req, res) => {
const { spawn } = require('child_process');
const pyProg = spawn('python', ['./../pypy.py']);
pyProg.stdout.on('data', function(data) {
console.log(data.toString());
res.write(data);
res.end('end');
});
})
app.listen(4000, () => console.log('Application listening on port 4000!'))
It doesn't require sys module in your Python script.
Below is a more modular way of performing the task using Promise:
const express = require('express')
const app = express()
let runPy = new Promise(function(success, nosuccess) {
const { spawn } = require('child_process');
const pyprog = spawn('python', ['./../pypy.py']);
pyprog.stdout.on('data', function(data) {
success(data);
});
pyprog.stderr.on('data', (data) => {
nosuccess(data);
});
});
app.get('/', (req, res) => {
res.write('welcome\n');
runPy.then(function(fromRunpy) {
console.log(fromRunpy.toString());
res.end(fromRunpy);
});
})
app.listen(4000, () => console.log('Application listening on port 4000!'))
The python-shell module by extrabacon is a simple way to run Python scripts from Node.js with basic, but efficient inter-process communication and better error handling.
Installation:
With npm:
npm install python-shell.
Or with yarn:
yarn add python-shell
Running a simple Python script:
const PythonShell = require('python-shell').PythonShell;
PythonShell.run('my_script.py', null, function (err) {
if (err) throw err;
console.log('finished');
});
Running a Python script with arguments and options:
const PythonShell = require('python-shell').PythonShell;
var options = {
mode: 'text',
pythonPath: 'path/to/python',
pythonOptions: ['-u'],
scriptPath: 'path/to/my/scripts',
args: ['value1', 'value2', 'value3']
};
PythonShell.run('my_script.py', options, function (err, results) {
if (err)
throw err;
// Results is an array consisting of messages collected during execution
console.log('results: %j', results);
});
For the full documentation and source code, check out https://github.com/extrabacon/python-shell
You can now use RPC libraries that support Python and Javascript such as zerorpc
From their front page:
Node.js Client
var zerorpc = require("zerorpc");
var client = new zerorpc.Client();
client.connect("tcp://127.0.0.1:4242");
client.invoke("hello", "RPC", function(error, res, more) {
console.log(res);
});
Python Server
import zerorpc
class HelloRPC(object):
def hello(self, name):
return "Hello, %s" % name
s = zerorpc.Server(HelloRPC())
s.bind("tcp://0.0.0.0:4242")
s.run()
Many of the examples are years out of date and involve complex setup. You can give JSPyBridge/pythonia a try (full disclosure: I'm the author). It's vanilla JS that lets you operate on foreign Python objects as if they existed in JS. In fact, it does interoperability so Python code can in return call JS through callbacks and passed functions.
numpy + matplotlib example, with the ES6 import system:
import { py, python } from 'pythonia'
const np = await python('numpy')
const plot = await python('matplotlib.pyplot')
// Fixing random state for reproducibility
await np.random.seed(19680801)
const [mu, sigma] = [100, 15]
// Inline expression evaluation for operator overloading
const x = await py`${mu} + ${sigma} * ${np.random.randn(10000)}`
// the histogram of the data
const [n, bins, patches] = await plot.hist$(x, 50, { density: true, facecolor: 'g', alpha: 0.75 })
console.log('Distribution', await n) // Always await for all Python access
await plot.show()
python.exit()
Through CommonJS (without top level await):
const { py, python } = require('pythonia')
async function main() {
const np = await python('numpy')
const plot = await python('matplotlib.pyplot')
...
// the rest of the code
}
main().then(() => python.exit()) // If you don't call this, the process won't quit by itself.
Most of previous answers call the success of the promise in the on("data"), it is not the proper way to do it because if you receive a lot of data you will only get the first part. Instead you have to do it on the end event.
const { spawn } = require('child_process');
const pythonDir = (__dirname + "/../pythonCode/"); // Path of python script folder
const python = pythonDir + "pythonEnv/bin/python"; // Path of the Python interpreter
/** remove warning that you don't care about */
function cleanWarning(error) {
return error.replace(/Detector is not able to detect the language reliably.\n/g,"");
}
function callPython(scriptName, args) {
return new Promise(function(success, reject) {
const script = pythonDir + scriptName;
const pyArgs = [script, JSON.stringify(args) ]
const pyprog = spawn(python, pyArgs );
let result = "";
let resultError = "";
pyprog.stdout.on('data', function(data) {
result += data.toString();
});
pyprog.stderr.on('data', (data) => {
resultError += cleanWarning(data.toString());
});
pyprog.stdout.on("end", function(){
if(resultError == "") {
success(JSON.parse(result));
}else{
console.error(`Python error, you can reproduce the error with: \n${python} ${script} ${pyArgs.join(" ")}`);
const error = new Error(resultError);
console.error(error);
reject(resultError);
}
})
});
}
module.exports.callPython = callPython;
Call:
const pythonCaller = require("../core/pythonCaller");
const result = await pythonCaller.callPython("preprocessorSentiment.py", {"thekeyYouwant": value});
python:
try:
argu = json.loads(sys.argv[1])
except:
raise Exception("error while loading argument")
I'm on node 10 and child process 1.0.2. The data from python is a byte array and has to be converted. Just another quick example of making a http request in python.
node
const process = spawn("python", ["services/request.py", "https://www.google.com"])
return new Promise((resolve, reject) =>{
process.stdout.on("data", data =>{
resolve(data.toString()); // <------------ by default converts to utf-8
})
process.stderr.on("data", reject)
})
request.py
import urllib.request
import sys
def karl_morrison_is_a_pedant():
response = urllib.request.urlopen(sys.argv[1])
html = response.read()
print(html)
sys.stdout.flush()
karl_morrison_is_a_pedant()
p.s. not a contrived example since node's http module doesn't load a few requests I need to make
You could take your python, transpile it, and then call it as if it were javascript. I have done this succesfully for screeps and even got it to run in the browser a la brython.
The Boa is good for your needs, see the example which extends Python tensorflow keras.Sequential class in JavaScript.
const fs = require('fs');
const boa = require('#pipcook/boa');
const { tuple, enumerate } = boa.builtins();
const tf = boa.import('tensorflow');
const tfds = boa.import('tensorflow_datasets');
const { keras } = tf;
const { layers } = keras;
const [
[ train_data, test_data ],
info
] = tfds.load('imdb_reviews/subwords8k', boa.kwargs({
split: tuple([ tfds.Split.TRAIN, tfds.Split.TEST ]),
with_info: true,
as_supervised: true
}));
const encoder = info.features['text'].encoder;
const padded_shapes = tuple([
[ null ], tuple([])
]);
const train_batches = train_data.shuffle(1000)
.padded_batch(10, boa.kwargs({ padded_shapes }));
const test_batches = test_data.shuffle(1000)
.padded_batch(10, boa.kwargs({ padded_shapes }));
const embedding_dim = 16;
const model = keras.Sequential([
layers.Embedding(encoder.vocab_size, embedding_dim),
layers.GlobalAveragePooling1D(),
layers.Dense(16, boa.kwargs({ activation: 'relu' })),
layers.Dense(1, boa.kwargs({ activation: 'sigmoid' }))
]);
model.summary();
model.compile(boa.kwargs({
optimizer: 'adam',
loss: 'binary_crossentropy',
metrics: [ 'accuracy' ]
}));
The complete example is at: https://github.com/alibaba/pipcook/blob/master/example/boa/tf2/word-embedding.js
I used Boa in another project Pipcook, which is to address the machine learning problems for JavaScript developers, we implemented ML/DL models upon the Python ecosystem(tensorflow,keras,pytorch) by the boa library.
/*eslint-env es6*/
/*global require*/
/*global console*/
var express = require('express');
var app = express();
// Creates a server which runs on port 3000 and
// can be accessed through localhost:3000
app.listen(3000, function() {
console.log('server running on port 3000');
} )
app.get('/name', function(req, res) {
console.log('Running');
// Use child_process.spawn method from
// child_process module and assign it
// to variable spawn
var spawn = require("child_process").spawn;
// Parameters passed in spawn -
// 1. type_of_script
// 2. list containing Path of the script
// and arguments for the script
// E.g : http://localhost:3000/name?firstname=Levente
var process = spawn('python',['apiTest.py',
req.query.firstname]);
// Takes stdout data from script which executed
// with arguments and send this data to res object
var output = '';
process.stdout.on('data', function(data) {
console.log("Sending Info")
res.end(data.toString('utf8'));
});
console.log(output);
});
This worked for me. Your python.exe must be added to you path variables for this code snippet. Also, make sure your python script is in your project folder.
const util = require('util');
const exec = util.promisify(require('child_process').exec);
function runPythonFile() {
const { stdout, stderr } = await exec('py ./path_to_python_file -s asdf -d pqrs');
if (stdout) { // do something }
if (stderr) { // do something }
}
For more information visit official Nodejs child process page: https://nodejs.org/api/child_process.html#child_processexeccommand-options-callback
you can check out my package on npm
https://www.npmjs.com/package/#guydev/native-python
it provides a very simple and powerful way to run python functions from node
import { runFunction } from '#guydev/native-python'
const example = async () => {
const input = [1,[1,2,3],{'foo':'bar'}]
const { error, data } = await runFunction('/path/to/file.py','hello_world', '/path/to/python', input)
// error will be null if no error occured.
if (error) {
console.log('Error: ', error)
}
else {
console.log('Success: ', data)
// prints data or null if function has no return value
}
}
python module
# module: file.py
def hello_world(a,b,c):
print( type(a), a)
# <class 'int'>, 1
print(type(b),b)
# <class 'list'>, [1,2,3]
print(type(c),c)
# <class 'dict'>, {'foo':'bar'}
I am writing a module in my Google AppEngine project in Go for performance reasons but need to be able to read from some of the entities I have in datastore. I wrote out the Go code to be able to read the entities I built out in Python but I am getting the following error:
datastore: flattening nested structs leads to a slice of slices: field "Messages"
Model Definitions in Python:
class ModelB(ndb.Model):
msg_id = ndb.StringProperty(indexed=False)
cat_ids = ndb.StringProperty(repeated=True, indexed=False)
list_ids = ndb.StringProperty(repeated=True, indexed=False)
default_list_id_index = ndb.IntegerProperty(indexed=False)
class ModelA(ndb.Model):
date_join = ndb.DateTimeProperty(auto_now_add=True)
name = ndb.StringProperty()
owner_salutation = ndb.StringProperty(indexed=False)
owner_email_address = ndb.StringProperty()
logo_url = ndb.StringProperty(indexed=False)
...
messages = ndb.LocalStructuredProperty(ModelB, name='bm', repeated=True)
And in Go:
type ModelB struct {
MessageID string `datastore:"msg_id,noindex"`
CategoryIDs []string `datastore:"cat_ids,noindex"`
ListIDs []string `datastore:"list_ids,noindex"`
DefaultListIDIndex int `datastore:"default_list_id_index,noindex"`
}
type ModelA struct {
DateJoin time.Time `datastore:"date_join,"`
Name string `datastore:"name,"`
OwnerSalutation string `datastore:"owner_salutation,noindex"`
OwnerEmailAddress string `datastore:"owner_email_address,"`
LogoURL string `datastore:"logo_url,noindex"`
Messages []ModelB `datastore:"bm,"`
}
Is there something I'm doing wrong here? Is just a feature incompatibility between Go vs Python model definitions?
Attempt to Decode ModelB
Re-define ModelA as follows:
import pb "appengine_internal/datastore"
import proto "code.google.com/p/goprotobuf/proto"
type ModelA struct {
DateJoin time.Time `datastore:"date_join,"`
Name string `datastore:"name,"`
OwnerSalutation string `datastore:"owner_salutation,noindex"`
OwnerEmailAddress string `datastore:"owner_email_address,"`
LogoURL string `datastore:"logo_url,noindex"`
Messages []ModelB `datastore:"-"`
}
// Load is implemented for the PropertyLoaderSaver interface.
func (seller *ModelA) Load(c <-chan datastore.Property) error {
f := make(chan datastore.Property, 100)
for p := range c {
if p.Name == "bm" {
var val pb.EntityProto
err := proto.Unmarshal([]byte(p.Value.(string)), &val)
if err != nil {
return err
}
//TODO: Store result as a new ModelB
} else {
f <- p
}
}
close(f)
return datastore.LoadStruct(seller, f)
}
But I receive the following error:
proto: required field "{Unknown}" not set
The Go datastore package doesn't support two layers of slices like that. You can have []ModelB, as long as ModelB doesn't contain any slices. Or, you can use ModelB in ModelA, and ModelB can have slices in it. But you can't have both []ModelB and ModelB has slices. See the code for the error condition. Your options:
don't do it in Go
write your own datastore deserializer to handle this case - this is probably hard
change your python data structures to satisfy the Go requirements and rewrite your data
I guess if you dig enough you'll find the answer:
First off, when defining the LocalStructuredProperty properties in Python, you need to set keep_keys=True
class ModelB(ndb.Model):
msg_id = ndb.StringProperty(indexed=False)
cat_ids = ndb.StringProperty(repeated=True, indexed=False)
list_ids = ndb.StringProperty(repeated=True, indexed=False)
default_list_id_index = ndb.IntegerProperty(indexed=False)
class ModelA(ndb.Model):
date_join = ndb.DateTimeProperty(auto_now_add=True)
name = ndb.StringProperty()
owner_salutation = ndb.StringProperty(indexed=False)
owner_email_address = ndb.StringProperty()
logo_url = ndb.StringProperty(indexed=False)
...
messages = ndb.LocalStructuredProperty(ModelB, name='bm', repeated=True, keep_keys=True)
A simple redefinition in my code and mapping over my entities doing a put() on each fixed up the representation.
Then in my Go Code:
type ModelB struct {
MessageID string `datastore:"msg_id,noindex"`
CategoryIDs []string `datastore:"cat_ids,noindex"`
ListIDs []string `datastore:"list_ids,noindex"`
DefaultListIDIndex int `datastore:"default_list_id_index,noindex"`
}
type ModelA struct {
DateJoin time.Time `datastore:"date_join,"`
Name string `datastore:"name,"`
OwnerSalutation string `datastore:"owner_salutation,noindex"`
OwnerEmailAddress string `datastore:"owner_email_address,"`
LogoURL string `datastore:"logo_url,noindex"`
Messages []ModelB `datastore:"-"`
}
// Load is implemented for the PropertyLoaderSaver interface.
func (s *ModelA) Load(c <-chan datastore.Property) (err error) {
f := make(chan datastore.Property, 32)
errc := make(chan error, 1)
defer func() {
if err == nil {
err = <-errc
}
}()
go func() {
defer close(f)
for p := range c {
if p.Name == "bm" {
var b ModelB
err := loadLocalStructuredProperty(&b, []byte(p.Value.(string)))
if err != nil {
errc <- err
return
}
s.Messages = append(s.Messages, b)
} else {
f <- p
}
}
errc <- nil
}()
return datastore.LoadStruct(s, f)
}
I had to copy a bunch from the appengine/datastore package as a key function wasn't exported and to simplify the amount of code I needed to copy, I dropped support for Reference types. I opened a ticket on the issue tracker to see if we can get the loadEntity function exported: https://code.google.com/p/googleappengine/issues/detail?id=10426
import (
"errors"
"time"
"appengine"
"appengine/datastore"
pb "appengine_internal/datastore"
proto "code.google.com/p/goprotobuf/proto"
)
func loadLocalStructuredProperty(dst interface{}, raw_proto []byte) error {
var val pb.EntityProto
err := proto.Unmarshal(raw_proto, &val)
if err != nil {
return err
}
return loadEntity(dst, &val)
}
//Copied from appengine/datastore since its not exported
// loadEntity loads an EntityProto into PropertyLoadSaver or struct pointer.
func loadEntity(dst interface{}, src *pb.EntityProto) (err error) {
c := make(chan datastore.Property, 32)
errc := make(chan error, 1)
defer func() {
if err == nil {
err = <-errc
}
}()
go protoToProperties(c, errc, src)
if e, ok := dst.(datastore.PropertyLoadSaver); ok {
return e.Load(c)
}
return datastore.LoadStruct(dst, c)
}
func protoToProperties(dst chan<- datastore.Property, errc chan<- error, src *pb.EntityProto) {
defer close(dst)
props, rawProps := src.Property, src.RawProperty
for {
var (
x *pb.Property
noIndex bool
)
if len(props) > 0 {
x, props = props[0], props[1:]
} else if len(rawProps) > 0 {
x, rawProps = rawProps[0], rawProps[1:]
noIndex = true
} else {
break
}
var value interface{}
if x.Meaning != nil && *x.Meaning == pb.Property_INDEX_VALUE {
value = indexValue{x.Value}
} else {
var err error
value, err = propValue(x.Value, x.GetMeaning())
if err != nil {
errc <- err
return
}
}
dst <- datastore.Property{
Name: x.GetName(),
Value: value,
NoIndex: noIndex,
Multiple: x.GetMultiple(),
}
}
errc <- nil
}
func fromUnixMicro(t int64) time.Time {
return time.Unix(t/1e6, (t%1e6)*1e3)
}
// propValue returns a Go value that combines the raw PropertyValue with a
// meaning. For example, an Int64Value with GD_WHEN becomes a time.Time.
func propValue(v *pb.PropertyValue, m pb.Property_Meaning) (interface{}, error) {
switch {
case v.Int64Value != nil:
if m == pb.Property_GD_WHEN {
return fromUnixMicro(*v.Int64Value), nil
} else {
return *v.Int64Value, nil
}
case v.BooleanValue != nil:
return *v.BooleanValue, nil
case v.StringValue != nil:
if m == pb.Property_BLOB {
return []byte(*v.StringValue), nil
} else if m == pb.Property_BLOBKEY {
return appengine.BlobKey(*v.StringValue), nil
} else {
return *v.StringValue, nil
}
case v.DoubleValue != nil:
return *v.DoubleValue, nil
case v.Referencevalue != nil:
return nil, errors.New("Not Implemented!")
}
return nil, nil
}
// indexValue is a Property value that is created when entities are loaded from
// an index, such as from a projection query.
//
// Such Property values do not contain all of the metadata required to be
// faithfully represented as a Go value, and are instead represented as an
// opaque indexValue. Load the properties into a concrete struct type (e.g. by
// passing a struct pointer to Iterator.Next) to reconstruct actual Go values
// of type int, string, time.Time, etc.
type indexValue struct {
value *pb.PropertyValue
}
The solution by someone1 works great but I have many millions of entities and didn't want to have to re-put them all (to add the keep_keys=True to the LocalStructuredProperty).
So, I created a cut-down version of EntityProto which removes the dependency on the key & path etc... Simply replace pb.EntityProto with LocalEntityProto and the existing python-written entities should load OK (I'm using a PropertyLoadSaver for the nested struct).
Disclaimer: I'm only using this to read from Go - I haven't tried writing the same entities back to see if they still load in Python.
import pb "google.golang.org/appengine/internal/datastore"
import proto "github.com/golang/protobuf/proto"
type LocalEntityProto struct {
Kind *pb.EntityProto_Kind `protobuf:"varint,4,opt,name=kind,enum=appengine.EntityProto_Kind" json:"kind,omitempty"`
KindUri *string `protobuf:"bytes,5,opt,name=kind_uri" json:"kind_uri,omitempty"`
Property []*pb.Property `protobuf:"bytes,14,rep,name=property" json:"property,omitempty"`
RawProperty []*pb.Property `protobuf:"bytes,15,rep,name=raw_property" json:"raw_property,omitempty"`
Rank *int32 `protobuf:"varint,18,opt,name=rank" json:"rank,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *LocalEntityProto) Reset() { *m = LocalEntityProto{} }
func (m *LocalEntityProto) String() string { return proto.CompactTextString(m) }
func (*LocalEntityProto) ProtoMessage() {}
func (m *LocalEntityProto) GetKind() pb.EntityProto_Kind {
if m != nil && m.Kind != nil {
return *m.Kind
}
return pb.EntityProto_GD_CONTACT
}
func (m *LocalEntityProto) GetKindUri() string {
if m != nil && m.KindUri != nil {
return *m.KindUri
}
return ""
}
func (m *LocalEntityProto) GetProperty() []*pb.Property {
if m != nil {
return m.Property
}
return nil
}
func (m *LocalEntityProto) GetRawProperty() []*pb.Property {
if m != nil {
return m.RawProperty
}
return nil
}
func (m *LocalEntityProto) GetRank() int32 {
if m != nil && m.Rank != nil {
return *m.Rank
}
return 0
}