python struct unpack a specific C struct from Berkeley DB - python

I have the following C struct:
#define UUID4_LEN 37
...
typedef struct can_record {
char id[UUID4_LEN];
char *can_data;
} CAN_RECORD;
I am saving that record in Berkeley DB via the below function:
int insert_record(DB **dbpp, CAN_RECORD * record) {
DB *db;
DBT key, data;
int ret;
db = *dbpp;
memset(&key, 0, sizeof(DBT));
memset(&data, 0, sizeof(DBT));
uuid4_generate(record->id);
key.data = record->id;
key.size = (u_int32_t)strlen(record->id) + 1;
data.data = &record;
data.size = sizeof(CAN_RECORD);
ret = db->put(db, 0, &key, &data, 0);
if(ret != 0) {
fprintf(stderr, "Unable to insert record %s, err: %s\n", record->id,
db_strerror(ret));
return ret;
}
printf("Record inserted %s %s\n", record->id, record->can_data);
return ret;
}
NOTE: the record->data has already been pre-populated previously, and it is of a variable length, but it is a stringified JSON structure, i.e.:
asprintf(&record.can_data, "{\"t\": \"%s\", \"b\": \"%s\", \"e\": \"%u\"\"}", U_UID, name, (unsigned)time(NULL));
I have a python process that reads the Berkeley DB (here is a small excerpt):
from berkeleydb import db
...
...
cursor = self._db.cursor()
record = cursor.first()
while record:
(id, data) = record
self.log(f'RECORD: {id} {data}')
id = struct.unpack("", id)
data = struct.unpack("", data)
self.log(f'DECODED: {id} {data}')
record = cursor.next()
...
The record data looks like this:
b'46c54a16-366a-4397-aa68-357ab5538590\x00'
and
b'P\x99\x12\x00x\xbb\xfd~(\xbb\xfd~\x16\x00\x00\x00\x04\x00\x00\x00x\xbb\xfd~\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x9f\x02A\x00\x00\x00\x00\x83.\xf0v#\x03\x00\x00\x08\x00\x00\x00\xf0h\x9e\x9fpo;\xcc\x1d\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00can0\x00\x00\x00\x00x\xbb\xfd~\x00\x00\x00\x00\x03\x00\x00\x00\xb0\xfa\x00A\x00\x00\x00\x00\x00\x00'
I am unable to figure out how I can use python's struct.unpack to decode the bytes string. I have tried variety of different formats, but have been unsuccessful.
How would you go about unpacking the struct, such that I have the original form.
Unfortunately, the Berkeley DB reader has to be in python.
Also note:
data.data = &record;
data.size = sizeof(CAN_RECORD);
the data is the entire struct, which includes the id[UUID4_LEN], and the *can_data.
What would I need to do here:
(id, data) = record
id = struct.unpack("", id)
data = struct.unpack("", data)
to achieve original form?

Ok, for the time being, I did a workaround. Rather than:
data.data = &record;
data.size = sizeof(CAN_RECORD);
I did:
data.data = record->can_data;
data.size = (u_int32_t)strlen(record->can_data) + 1;
So, I only saved the string, rather than the entire struct. Then, in my python, I simply did:
(id, data) = record
id = str(id, 'utf-8')
data = str(data, 'utf-8')
self.log(f'ID: {id}')
self.log(f'DATA: {data}')
and that decoded the byte string, to just a string perfectly:
ID: dacaf94f-ecf5-4252-89d8-e2c9deff8f8d
DATA: {"t": "", "b": "abc123", "e": "1653636766"}
Although, this has helped me progress without using python struct.unpack, I am still keen on understanding how to unpack structs in python, as I will likely have a future requirement with slightly more complicated struct definitions.

Related

Convert JSon object to Json array/Python List

I need to read keys in the Json file to later use them as columns and insert/update with the values pertaining to those Json file keys. The problem is that my Json has the first element as a Json Object (see code below).
Json:
{
"metadata":
{
"namespace": "5.2.0",
"message_id": "3c80151b-fcf3-4cc3-ada0-635be5b5c95f",
"transmit_time": "2020-01-30T11:25:47.247394-06:00",
"message_type": "pricing",
"domain": "Pricing Service",
"version": "1.0.0"
}
,
"prices": [
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 341214,
"item_number": 244312,
"trade_base_price": 14.99,
"competitive_price": 20.00
},
{
"price": 24.99,
"effective_date": "2019-06-01T00:00:00-05:00",
"strikethrough": 34.99,
"expiration_date": "2019-06-01T00:00:00-05:00",
"modified_date": "2019-08-30T02:14:39.044968-05:00",
"base_price": 25.99,
"sku_id": 674523,
"item_number": 279412,
"trade_base_price": 14.99,
"competitive_price": 20.00
}
]
}
So when I read the "metadata" using get_data function below
SQL Postgres Table:
DROP TABLE MyTable;
CREATE TABLE IF NOT EXISTS MyTable
(
price numeric(5,2),
effective_date timestamp without time zone,
strikethrough numeric(5,2),
expiration_date timestamp without time zone,
modified_date timestamp without time zone,
base_price numeric(5,2),
sku_id integer CONSTRAINT PK_MyPK PRIMARY KEY NOT NULL,
item_number integer,
trade_base_price numeric(5,2),
competitive_price numeric(5,2),
namespace character varying(50),
message_id character varying(50),
transmit_time timestamp without time zone,
message_type character varying(50),
domain character varying(50),
version character varying(50)
)
Python 3.9:
import psycopg2
import json
# import the psycopg2 database adapter for PostgreSQL
from psycopg2 import connect, Error
with open("./Pricing_test.json") as arq_api:
read_data = json.load(arq_api)
# converts Json oblect "metadata" to a Json Array of Objects/Python list
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()] #this dies not work properly as "post_gre" function below only reads the very last key in the Json Array of Objects
#print(read_data)
data_pricing = []
def get_PricingData():
list_1 = read_data["prices"]
for dic in list_1:
price = dic.get("price")
effective_date = dic.get("effective_date")
strikethrough = dic.get("strikethrough")
expiration_date = dic.get("expiration_date")
modified_date = dic.get("modified_date")
base_price = dic.get("base_price")
sku_id = dic.get("sku_id")
item_number = dic.get("item_number")
trade_base_price = dic.get("trade_base_price")
competitive_price = dic.get("competitive_price")
data_pricing.append([price, effective_date, strikethrough, expiration_date, modified_date, base_price, sku_id, item_number, trade_base_price, competitive_price, None, None, None, None, None, None])
get_PricingData()
data_metadata = []
def get_Metadata():
list_2 = read_data["metadata"]
for dic in list_2:
namespace = dic.get("namespace")
message_id = dic.get("message_id")
transmit_time = dic.get("transmit_time")
message_type = dic.get("message_type")
domain = dic.get("domain")
version = dic.get("version")
#if len(namespace) == 0:
#data_pricing.append([None, None, None, None, None, version])
#else:
#for sub_dict in namespace:
#namespace = sub_dict.get("namespace")
#message_id = sub_dict.get("message_id")
#transmit_time = sub_dict.get("transmit_time")
#message_type = sub_dict.get("message_type")
#domain = sub_dict.get("domain")
#data_pricing.append([group_id, group_name, subgrop_id, subgrop_name, None, None, None])
data_metadata.append([namespace, message_id, transmit_time, message_type, domain, version])
get_Metadata()
conn = connect(
host="MyHost",
database="MyDB",
user="MyUser",
password="MyPassword",
# attempt to connect for 3 seconds then raise exception
connect_timeout = 3
)
cur = conn.cursor()
cur.execute("TRUNCATE TABLE MyTable") #comment this one out to avoid sku_id PK violation error
def post_gre():
for item in data_pricing:
my_Pricingdata = tuple(item)
cur.execute("INSERT INTO MyTable VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", my_Pricingdata)
#upades with metadata
for item2 in data_metadata:
my_Metadata = tuple(item2)
cur.execute("UPDATE MyTable SET namespace = %s, message_id = %s, transmit_time = %s, message_type = %s, domain = %s, version = %s", my_Metadata)
post_gre()
conn.commit()
conn.close()
it throughs me the following error:
namespace = dic.get("namespace") AttributeError: 'str' object has no attribute 'get'
But if I wrap the metadata Json object with array brackets [] (see pic below) it works perfectly fine - It reads every key in the metadata as a separate column (namespace, message_id, transmit_time, message_type, domain, version)
But since I should not modify the JSon source file itself I need to interpret "metadata" to a python List type, so that it could read the keys.
P.S.
Almost right Solution:
read_data["metadata"] = [{key:value} for key,value in read_data["metadata"].items()]
Suggestion provided by Hi #Suraj works, but for some reason it inserts NULL for all "metadata" keys column (namespace, message_id, transmit_time, message_type, domain), except for "version". Any idea why? It does insert correct values when changing the Json by adding []. But should not do it.
I was able to narrow down the issue with not reading other keys in the "metadata", it basically reads only one very last key which happens to "Version", but if you change the order it would read the very last one whatever you change it to (eg.: "domain").
How about now ?
import pandas as pd
import json
with open('stak_flow.json') as f:
data = json.load(f)
data['metadata'] = [{key:value} for key,value in data['metadata'].items()]
print(data)

How to use datastore GAE in Go when initially it was created in Python?

I have a datastore kind "Items" which was created in Python, in this code do not iterate data q.Run() in Go (it's version 2):
type Items struct{
code string
date time.Time
name string
}
func getcode(w http.ResponseWriter, r *http.Request) {
code := mux.Vars(r)["code"]
fmt.Fprintf(w,"get code %v",code)
c := appengine.NewContext(r)
q := datastore.NewQuery("Items")
for t := q.Run(c); ; {
var x Items
key, err := t.Next(&x)
fmt.Fprintf(w,"%v",key)
if err == datastore.Done {
break
}
if err != nil {
//serveError(c, w, err)
return
}
fmt.Fprintf(w, "Code=%v\n", x.code)
}
The Datastore package uses reflection to fill struct fields when reading an entity from the datastore. In Go struct fields whose name start with lowercase letter are not exported. Unexported fields cannot be set from packages other than the one they were defined in.
Only exported fields (that start with uppercase letters) can be stored in / retrieved from the datastore. You can use tags to tell what the name of the property is in the datastore in case it differs from the field's name. So you have to change your Items struct to this:
type Items struct {
Code string `datastore:"code"`
Date time.Time `datastore:"date"`
Name string `datastore:"name"`
}

can't figure out json elements result

Server side execute a SQL query (server is written in python) returns a json which looks like this:
return HttpResponse(json.dumps([{"data":output, "total":theResult}]), content_type ='application/json')
output is a result return from inner method and is already serialized like this:
output = serializers.serialize('json',p_list,fields=('price','publishdate','size'))
Client side receive successfully the response, success:^(AFHTTPRequestOperation *operation, id responseObject) using AFHTTPRequestOperationManager and this is what responseObject looks like in debug console (lldb)
po responseObject
<__NSCFArray 0x116fa6190>(
{
data = "[{\"pk\": 817, \"model\": \"xx\", \"fields\": { \"price\": \"3300\", \"publishdate\": \"2014-10-30T00:00:00\", \"size\": 35}}, {\"pk\": 2799, \"model\": \"xx\", \"fields\": { \"price\": \"6250\", \"publishdate\": \"2014-12-08T00:00:00\",\"size\": 0}}]";
total = (
381
);
}
)
in console po responseObject[0][#"data"][0] prints the data array and p responseObject[0][#"total"][0] print 381 as expected
The Problem:
in code trying to cast responseObject[0][#"total"][0] into integer return a garbage number
casting responseObject[0][#"data"][0] into NSArray* and then trying to perform count or any other operation causes an exception: 'NSInvalidArgumentException', reason: '-[__NSCFString count]: unrecognized selector sent to instance
Part of the problem may be that the description method does not quote all strings so 381 may be a string.
Posible solutions (lacking OP code):
if it is really an int use:
int value = responseObject[0][#"total"][0]
if it is really a string:
NSString *valueString = responseObject[0][#"total"][0]
int value = [valueString intValue];

Extracting BIND parameters to build a JSON query

I have a file which was exported from BIND containing TSIG values for about 500 domain names. I need to repurpose the data into JSON for a REST API query. The BIND data is formatted like so:
// secondary-example.com.
key "2000000000000.key." {
algorithm hmac-md5;
secret "ahashedvalue=";
};
zone "secondary-example.com." {
type slave;
file "sec/secondary-example.com.";
allow-transfer { 1.1.1.1;
1.1.2.2;
};
also-notify { 1.1.1.1;
2.2.2.2;
};
masters {
1.2.3.4 key 2000000000000.key.;
};
};
From this I need to extract the key, zone and secret. Here's an example API request.
{
"properties":{
"name":"secondary-example.com.",
"accountName":"example",
"type":"SECONDARY"
},
"secondaryCreateInfo":{
"primaryNameServers":{
"nameServerIpList":{
"nameServerIp1":{
"ip":"1.2.3.4",
"tsigKey":"2000000000000.key.",
"tsigKeyValue":"ahashedvalue="
}
}
}
}
}
I'm having difficulty crafting a regular expression appropriate for the scenario. I'm looking construct the JSON in a python script and send the request through Postman.
I spent a couple days reading up on regex and figured out a solution. So, each of those "zones" began with a comment... e.g. "secondary-example.com"... and each set of BIND info was 17 lines long exactly. This solution is hackey and always assumes data is correct, but it managed to work.
Separate the zones into chunks of text.
zones = []
cur_zone = ''
f = open(bind_file).readlines()
for line in f:
if line[0:2] == '//':
zones.append(cur_zone)
cur_zone = ''
else:
cur_zone = cur_zone + line
zones.pop(0) # Drop the first list item, it's empty
Iterate through those chunks and match the needed parameters.
for z in zones:
z_lines = z.splitlines()
# Regex patterns to make the required parameters
key = re.findall('\"(.*)\"', z_lines[0])[0]
secret = re.findall('\"(.*)\"', z_lines[2])[0]
name = re.findall('\"(.*)\"', z_lines[5])[0]
master = re.findall('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', z_lines[15])[0]

while loop for deleting data in datastore

I have attempted to clean up and revise code in an answer here for my needs where I only want to delete from the Model Reservations for data records prior to the date expressed in the get as yy,mm,dd.
If I am correctly anticipating the action of cleanTable/2012/10/5 against the routing ('/cleanTable/([\d]+)/([\d]+)/([\d]+)', CleanTable) then my code would only delete at most 50 (10*nlimit) data records.
Btw, the author of the original code (who likely no longer subscribes to SO), claimed his main trick for accomplishing this code was "to include redirect in html instead of using self.redirect".
I am unfamiliar with raise Exception and the like, but my instinct would be to add a raise Exception or raise StopIteration to the for loop after it is made into a while loop. But it is not clear to me whether raising an StopIteration exception actually causes iteration to stop or if more is needed. Also, I don't know how to revise so the html ends smoothly upon early exit.
class CleanTable(BaseHandler):
def get(self, yy,mm,dd):
nlimit=5
iyy=int(yy)
imm=int(mm)
idd=int(dd)
param=date(iyy,imm,idd)
q=Reservations.all(keys_only=True)
q.filter("date < ", dt(iyy,imm,idd))
results = q.fetch(nlimit)
self.response.headers['Content-Type'] = 'text/plain'
self.response.out.write("""
<html>
<meta HTTP-EQUIV="REFRESH" content="url=http://yourapp.appspot.com/cleanTable">
<body>""")
try:
for i in range(10):
db.delete(results)
results = q.fetch(nlimit, len(results))
for r in results:
logging.info("r.name: %s" % r.name)
self.response.out.write("<p> "+str(nlimit)+" removed</p>")
self.response.out.write("""
</body>
</html>""")
except Exception, inst:
logging.info("inst: %s" % inst)
self.response.out.write(str(inst))
This is not the best approach to clean your models. A better approach would be to get all the keys of your entities and create Task Queues. Each queue will get a batch of keys for the entities that need to be modified.
Another approach would also be to create a cron job that will query for the x number of oldest modified entities, fix them and then store them back.
Finally, if your number of entities is so huge, you could also consider the use of Backends.
Hope this helps.
Here is my update routine and it has converted 500.000 entities. Be sure to run it on a backend instance (You can target a Queue to a backend instance). Notice that I am using a cursor, thats the only way you can consistently iterate through data (Never use offset!).
Queue queue = QueueFactory.getQueue("grinderQueue");
queue.add(TaskOptions.Builder.withPayload(new DeferredTask() { //lets generate
private static final long serialVersionUID = 1L;
#Override
public void run() {
String cursor = null;
boolean done = false;
Date now = new Date(1346763868L * 1000L); // 09/04/2012
while(!done) {
DatastoreService datastore = DatastoreServiceFactory.getDatastoreService();
Query query = new Query("Venue");
query.setFilter(new FilterPredicate("timeOfLastUpdate", Query.FilterOperator.LESS_THAN,now));
PreparedQuery pq = datastore.prepare(query);
FetchOptions fetchOptions = FetchOptions.Builder.withLimit(1000);
if(cursor != null)
fetchOptions.startCursor(Cursor.fromWebSafeString(cursor));
QueryResultList<Entity> results = pq.asQueryResultList(fetchOptions);
List<Entity> updates = new ArrayList<Entity>();
List<Entity> oldVenueUpdates = new ArrayList<Entity>();
int tuples = 0;
for(Entity en : results) {
tuples++;
try {
if(en.getProperty(Venue.VENUE_KEY) == null)
continue;
Entity newVenue = new Entity("CPVenue",(String)en.getProperty(Venue.VENUE_KEY));
newVenue.setPropertiesFrom(en);
newVenue.removeProperty("timeOfLastVenueScoreCalculation");
newVenue.removeProperty("actionsSinceLastVenueScoreCalculation");
newVenue.removeProperty("venueImageUrl");
newVenue.removeProperty("foursquareId");
newVenue.setProperty("geoCell", GeoCellCalculator.calcCellId(Double.valueOf((String)en.getProperty("lng")), Double.valueOf((String)en.getProperty("lat")),8));
newVenue.setProperty(Venue.TIME_SINCE_LAST_UPDATE, new Date());
updates.add(newVenue);
Venue v = new Venue(newVenue);
//Set timestamp on Venue
en.setProperty("timeOfLastUpdate", now);
oldVenueUpdates.add(en);
}catch(Exception e) {
logger.log(Level.WARNING,"",e);
}
}
done = tuples == 0;
tuples = 0;
if(results.getCursor() != null)
cursor = results.getCursor().toWebSafeString();
else
done = true;
System.out.println("Venue Conversion LOOP updates.. " + updates.size() + " cursor " + cursor);
datastore.put(updates);
datastore.put(oldVenueUpdates);
}
System.out.println("Venue Conversion DONE");
}}));

Categories