Python transforming value to tuple - python

In python, I receive JSON data. The data looks like the following stub:
{
"id": 1,
"naam": "4.13",
"ruimte_temperatuur_sensor": {...},
// etc
}
I map this json to an object (note the sensor is mapped already):
ruimte = Ruimte(id=id,
naam=naam,
ruimte_temperatuur_sensor=temperatuur_sensor,
ruimte_humiditeit_sensor=humiditeit_sensor,
ruimte_beweging_sensor=beweging_sensor,
airco_temperatuur_sensor=airco_sensor,
radiator_temperatuur_sensor=radiator_sensor)
The strangest thing happens:
The id field in JSON is an integer, but Python maps it to a tuple. In my debugger, you can clearly see that the id=id maps to an integer, but then all of a sudden my object contains a tuple:
The object's constructor should not cause that:
class Ruimte:
def __init__(self,
id: int,
naam: str,
ruimte_temperatuur_sensor: Sensor,
ruimte_humiditeit_sensor: Sensor,
ruimte_beweging_sensor: Sensor,
airco_temperatuur_sensor: Sensor,
radiator_temperatuur_sensor: Sensor):
self.id = id,
self.naam = naam,
self.ruimte_temperatuur_sensor = ruimte_temperatuur_sensor
self.ruimte_humiditeit_sensor = ruimte_humiditeit_sensor
self.ruimte_beweging_sensor = ruimte_beweging_sensor
self.airco_temperatuur_sensor = airco_temperatuur_sensor
self.radiator_temperatuur_sensor = radiator_temperatuur_sensor
In the sub-objects the id is not parsed to a tuple, for exampe ruimte.airco_temperatuur_sensor.id is an integer:
but that JSON is parsed the same way:
def _parse_json_to_sensor(self, json: dict) -> Sensor:
id = json["id"]
type = SensorType(json["type"])
meet_interval_sec = json["sensorInstelling"]["meetIntervalSec"]
opslaan_interval_sec = json["sensorInstelling"]["opslaanIntervalSec"]
sensor = Sensor(id=id,
type=type,
meet_interval_sec=meet_interval_sec,
opslaan_interval_sec=opslaan_interval_sec)
I'm totally lost on this. What could cause this?

You have commas after the lines where you assign self.id and self.naam. Remove them.
a_string = 'string',
type(a_string)
>>> tuple

The comma in the line:
self.id = id,
leads to the creation of a tuple. See this example:
a = 1
b = 1
c = 1,
print(b)
print(c)

Related

Protobuf timestamp getting auto converted

I am reading timestamp strings of format '2014-11-09T01:00Z' from mongo and storing in a python dict. But while retrieving same from the dict I see some of the strings got automatically converted to proto timestamp like seconds: 1511047800.
I have no clue how could this happen. Any insights appreciated.
class SomeInfo:
__school_id_info = defaultdict(
lambda: {'banned': -1,
'school_boy': False,
'teacher': False})
def __init__(self):
mongo_connection_str = os.environ['MONGO_CONNECTION_STRING']
mongo_db_name = os.environ.get('MONGO_DB_NAME', 'coll')
mongo_dao = MongoDao(mongo_connection_str, mongo_db_name)
mongo_collection_name = 'school'
school_records = mongo_dao.find_records(mongo_collection_name)
for school_details in school_records:
self.__school_id_info['date'] = school_details['date']
#If I print the __school_id_info here I see date as '2014-11-09T01:00Z' string format which is same as how it is in Mongo.
self.__dict__ = self.__school_id_info
def get_info_for_both_students(self) -> dict:
#while returning from here I see some as seconds: 1235192400 protobuff timestamp format and some still as it is
# .I am not doing any conversion to protobuf timestamp
return self.__school_id_info

How to convert Hive schema to Bigquery schema using Python?

What i get from api:
"name":"reports"
"col_type":"array<struct<imageUrl:string,reportedBy:string>>"
So in hive schema I got:
reports array<struct<imageUrl:string,reportedBy:string>>
Note: I got hive array schema as string from api
My target:
bigquery.SchemaField("reports", "RECORD", mode="NULLABLE",
fields=(
bigquery.SchemaField('imageUrl', 'STRING'),
bigquery.SchemaField('reportedBy', 'STRING')
)
)
Note: I would like to create universal code that can handle when i receive any number of struct inside of the array.
Any tips are welcome.
I tried creating a script that parses your input which is reports array<struct<imageUrl:string,reportedBy:string>>. This converts your input to a dictionary that could be used as schema when creating a table. The main idea of the apporach is instead of using SchemaField(), you can create a dictionary which is much easier than creating SchemaField() objects with parameters using your example input.
NOTE: The script is only tested based on your input and it can parse more fields if added in struct<.
import re
from google.cloud import bigquery
def is_even(number):
if (number % 2) == 0:
return True
else:
return False
def clean_string(str_value):
return re.sub(r'[\W_]+', '', str_value)
def convert_to_bqdict(api_string):
"""
This only works for a struct with multiple fields
This could give you an idea on constructing a schema dict for BigQuery
"""
num_even = True
main_dict = {}
struct_dict = {}
field_arr = []
schema_arr = []
# Hard coded this since not sure what the string will look like if there are more inputs
init_struct = sample.split(' ')
main_dict["name"] = init_struct[0]
main_dict["type"] = "RECORD"
main_dict["mode"] = "NULLABLE"
cont_struct = init_struct[1].split('<')
num_elem = len(cont_struct)
# parse fields inside of struct<
for i in range(0,num_elem):
num_even = is_even(i)
# fields are seen on even indices
if num_even and i != 0:
temp = list(filter(None,cont_struct[i].split(','))) # remove blank elements
for elem in temp:
fields = list(filter(None,elem.split(':')))
struct_dict["name"] = clean_string(fields[0])
# "type" works for STRING as of the moment refer to
# https://cloud.google.com/bigquery/docs/schemas#standard_sql_data_types
# for the accepted data types
struct_dict["type"] = clean_string(fields[1]).upper()
struct_dict["mode"] = "NULLABLE"
field_arr.append(struct_dict)
struct_dict = {}
main_dict["fields"] = field_arr # assign dict to array of fields
schema_arr.append(main_dict)
return schema_arr
sample = "reports array<struct<imageUrl:string,reportedBy:string,newfield:bool>>"
bq_dict = convert_to_bqdict(sample)
client = bigquery.Client()
project = client.project
dataset_ref = bigquery.DatasetReference(project, '20211228')
table_ref = dataset_ref.table("20220203")
table = bigquery.Table(table_ref, schema=bq_dict)
table = client.create_table(table)
Output:

Creating namedtuple valid for differents parameters

I'm trying to figure it out a way to create a namedtuple with variable fields depending on the data you receive, in my case, I'm using the data from StatCounter and not on all the periods are the same browsers. I tried this way but it is a bit ugly and I'm sure there is a better way to achieve it.
def namedtuple_fixed(name: str, fields: List[str]) -> namedtuple:
"""Check the fields of the namedtuple and changes the invalid ones."""
fields_fixed: List[str] = []
for field in fields:
field = field.replace(" ", "_")
if field[0].isdigit():
field = f"n{field}"
fields_fixed.append(field)
return namedtuple(name, fields_fixed)
Records: namedtuple = namedtuple("empty_namedtuple", "")
def read_file(file: str) -> List["Records"]:
"""
Read the file with info about the percentage of use of various browsers
"""
global Records
with open(file, encoding="UTF-8") as browsers_file:
reader: Iterator[List[str]] = csv.reader(browsers_file)
field_names: List[str] = next(reader)
Records = namedtuple_fixed("Record", field_names)
result: List[Records] = [
Records(
*[
dt.datetime.strptime(n, "%Y-%m").date()
if record.index(n) == 0
else float(n)
for n in record
]
)
for record in reader
]
return result
The "namedtuple_fixed" function is to fix the names that have invalid identifiers.
Basically, I want to create a named tuple that receives a variable number of parameters, depending on the file you want to analyze. And if it's with type checking incorporated (I mean using NamedTuple from the typing module), much better.
Thanks in advance.
This solves my problem, but just partially
class Record(SimpleNamespace):
def __repr__(self):
items = [f"{key}={value!r}" for key, value in self.__dict__.items()]
return f"Record({', '.join(items)})"
Using the types.SimpleSpace documentation
And it can cause problems, like for example if you initiallize a Record like the following:
foo = Record(**{"a": 1, "3a": 2})
print(foo.a) # Ok
print(foo.3a) # Syntax Error

Layered Classes and JSON encoding

I have some Data that I need to write to a JSON string.
I have it working with dict items but want to encompass it all in classes to help ensure the correct data.
The following code is a comparison between the dict items and the class item output. They don't match, and I can't figure out what I am missing.
I get a "bound method Event.encode of Event..." in my JSON string.
from collections import namedtuple
import json
class Event(namedtuple('Event', 'itemName, itemID')):
def encode(self):
obj = {}
obj['itemName'] = str(self.itemName)
obj['itemID'] = int(self.itemID)
return json.dumps(obj)
curEv = Event('MyName', 5)
print 'ClassEv : ', curEv.encode()
curEv2 = {'itemName':'MyName', 'itemID':5}
print 'DictEv : ', json.dumps(curEv2)
class Packet(namedtuple('Packet', 'pID, itemType, itemData')):
def encode(self):
obj = {}
obj['pID'] = int(self.pID)
obj['itemType'] = int(self.itemType)
obj['itemData'] = str(self.itemData.encode)
return json.dumps(obj)
packet = Packet(11, 0, curEv)
print 'ClassPacket: ', packet.encode()
packet2 = {'pID':11, 'itemType':0}
packet2['itemData'] = curEv2
print 'DictPacket : ', json.dumps(packet2)
You are failing to call the itemData.encode() function. Instead you are simply returning a reference to it.
Try:
obj['itemData'] = str(self.itemData.encode())
Note the extra () at the end.

serializing sqlalchemy class to json

I'm trying to serialize the result (a list) of an sqlalchemy query to json.
this is the class:
class Wikilink(Base):
__tablename__='Wikilinks'
__table_args__={'extend_existing':True}
id = Column(Integer,autoincrement=True,primary_key=True)
title = Column(Unicode(350))
user_ip = Column(String(50))
page = Column(String(20))
revision = Column(String(20))
timestamp = Column(String(50))
and I guess my problem is with the __repr__(self): function.
I tried something like:
return '{{0}:{"title":{1}, "Ip":{2}, "page":{3} ,"revision":{4}}}'.format(self.id,self.title.encode('utf-8'),self.user_ip,self.page,self.revision)
or:
return '{"id"={0}, "title"={1}, "Ip"={2}}'.format(self.id,self.title.encode('utf-8'),self.user_ip.encode('utf-8'),self.page,self.revision)
and I got:
TypeError(repr(o) + " is not JSON serializable")
ValueError: Single '}' encountered in format string
I tried:
return '{id=%d, title=%s, Ip=%s}'%(self.id,self.title.encode('utf-8'),self.user_ip.encode('utf-8'))
and I got:
TypeError: {id=8126, title=1 בדצמבר, Ip=147.237.70.106} is not JSON serializable
adding "" around (according to the JSON formatting) like this: "id"="%d", "title"="%s", "Ip"="%s" didn't help either.
I know this is supposed to be dead simple but I just can't get this right
actually bottle is handling the jsonification part automatically, but trying to call json.dumps on the result gives me the same errors.
Instead of trying to convert to json a string, you could define, for example, your own to_dict method that returns the dictionary structure it seems you're trying to create and, after that, generate the json from that structure:
>>> import json
>>> d = {'id':8126, 'title':u'1 בדצמבר', 'ip':'147.237.70.106'}
>>> json.dumps(d)
'{"ip": "147.237.70.106", "id": 8126, "title": "1 \\u05d1\\u05d3\\u05e6\\u05de\\u05d1\\u05e8"}'
I'm not sure I understand what you tried. Couldn't you build the dict and let json.dumps() do the work for you?
Something like:
>>> class Foo:
... id = 1
... title = 'my title'
... to_jsonize = ['id', 'title']
>>>
>>> dct = {name: getattr(Foo,name) for name in Foo.to_jsonize}
>>> import json
>>> json.dumps(dct)
'{"id": 1, "title": "my title"}'

Categories