i was trying to find a solution similar to java Jackson ObjectMapper that can do serialization/deserialization of python object to json. and find that
cattrs is closest to what i need. but it cannot do attribute mapping like use firstName in json but first_name in the deserialized object.
attrs-serde can do the attribute mapping but cannot do recursive deserialization.
the problem can be illustrated in this example,
import attr
import cattr
from attrs_serde import serde
name_path = ["contact", "personal", "Name"]
phone_path = ["contact", "Phone"]
#serde
#attr.s(auto_attribs=True, frozen=True)
class Name:
first: str
last: str
#serde
#attr.s(auto_attribs=True, frozen=True)
class Person:
name: Name = attr.ib(metadata={"to": name_path, "from": name_path})
phone: str = attr.ib(metadata={"to": phone_path, "from": phone_path})
person_json = {"contact": {"personal": {"Name": {"first": "John", "last": "Smith"}}, "Phone": "555-112233"}}
# XXX: to/from only works on serde
p = Person(name=Name(first="John", last="Smith"), phone="555-112233")
print(p.to_dict())
# {'contact': {'personal': {'Name': {'first': 'John', 'last': 'Smith'}}, 'Phone': '555-112233'}}
p1 = Person.from_dict(person_json)
print(f"p1={p1}")
# p1=Person(name={'first': 'John', 'last': 'Smith'}, phone='555-112233')
# XXX: nested only works on cttrs
person = {"Name": {"First": "John", "Last": "Smith"}, "Phone": "555-112233"}
converter = cattr.Converter()
converter.register_structure_hook(
Person, lambda d, _: Person(name=converter.structure(d["Name"], Name), phone=d.get("Phone"))
)
converter.register_structure_hook(Name, lambda d, _: Name(first=d["First"], last=d.get("Last")))
p2 = converter.structure(person, Person)
print(p2)
assert p == p2
print(converter.unstructure(p2))
# {'name': {'first': 'John', 'last': 'Smith'}, 'phone': '555-112233'}
# {"contact": {"personal": {"name": "John"}, "phone": "555-112233"}}
any more elegant solution using cattr?
you can use humps to do case convert
import humps
import cattr
class CAttrConverter:
converter = cattr.Converter()
def __init__(self):
"""
structure hook for load
unstructure hook for dump
"""
def load(self, params, data_cls, camel_to_snake=True):
"""
:param params: params, mostly from front end
:param data_cls:
:param camel_to_snake: need to convert from camel style to snake style
"""
if camel_to_snake:
params = humps.depascalize(params)
return self.converter.structure(params, data_cls)
def dump(self, data, snake_to_camel=False):
"""
:param data:
:param snake_to_camel: dump as camel case
"""
result: dict = self.converter.unstructure(data)
if snake_to_camel:
result = humps.camelize(result)
return result
Posting this for those in the future.
Yeah you can achieve this by overloading the convert classes methods:
def unstructure_attrs_asdict(self, obj) -> Dict[str, Any]:
def structure_attrs_fromdict(
self, obj: Mapping[str, Any], cl: Type[T]
) -> T:
Or if you want tuples
def unstructure_attrs_astuple(self, obj) -> Tuple[Any, ...]:
def structure_attrs_fromtuple(
self, obj: Tuple[Any, ...], cl: Type[T]
) -> T:
Simple Converter classes that uses the to and from fields from metadata. I will leave handling nested fields to your imagination.
from typing import TypeVar, Dict, Any, Mapping, Type
from cattr import Converter
from cattr._compat import fields
T = TypeVar("T")
class ConverterWithMetaDataOverrides(Converter):
# Classes to Python primitives.
def unstructure_attrs_asdict(self, obj) -> Dict[str, Any]:
"""Our version of `attrs.asdict`, so we can call back to us."""
attrs = fields(obj.__class__)
dispatch = self._unstructure_func.dispatch
rv = self._dict_factory()
for a in attrs:
name = a.name
serialize_as = name
if 'to' in a.metadata:
serialize_as = a.metadata['to']
v = getattr(obj, name)
rv[serialize_as] = dispatch(a.type or v.__class__)(v)
return rv
def structure_attrs_fromdict(
self, obj: Mapping[str, Any], cl: Type[T]
) -> T:
"""Instantiate an attrs class from a mapping (dict)."""
# For public use.
conv_obj = {} # Start with a fresh dict, to ignore extra keys.
dispatch = self._structure_func.dispatch
for a in fields(cl): # type: ignore
# We detect the type by metadata.
type_ = a.type
name = a.name
serialize_from = name
if 'from' in a.metadata:
serialize_from = a.metadata['from']
try:
val = obj[serialize_from]
except KeyError:
continue
if name[0] == "_":
name = name[1:]
conv_obj[name] = (
dispatch(type_)(val, type_) if type_ is not None else val
)
return cl(**conv_obj) # type: ignore
converter = ConverterWithMetaDataOverrides()
Usage:
#attrs(slots=True, frozen=True, auto_attribs=True)
class LevelTwo(object):
a: str = ib(metadata={'from': 'haha_not_a', 'to': 'haha_not_a'})
b: str
c: int
#attrs(slots=True, frozen=True, auto_attribs=True)
class LevelOne(object):
leveltwo: LevelTwo = ib(metadata={'from': 'level_two', 'to': 'level_two'})
#attrs(slots=True, frozen=True, auto_attribs=True)
class Root(object):
levelone: LevelOne = ib(metadata={'from': 'levelOne', 'to': 'levelOne'})
converter.structure(converter.unstructure(Root(levelone=LevelOne(leveltwo=LevelTwo(a='here', b='here_again', c=42)))),
Root)
>>> converter.unstructure(Root(levelone=LevelOne(leveltwo=LevelTwo(a='here', b='here_again', c=42)))
>>> {'levelOne': {'level_two': {'haha_not_a': 'here', 'b': 'here_again', 'c': 42}}}
Related
So I'm working with some JSON data in Python. Its basically a wrapper for an API though I want to have dot access to my values like data.size, I've done a bit of research but I couldn't find the desired results.
I was using json.loads to parse my data so I tried object hooks but that isn't what I want.
Here's an example Go code that I want to replicate.
type dat struct {
ResponseTime int
Body body
}
type body struct {
Day int
Month int
Year int
}
var h dat
// e here is my json
data = json.Unmarshal(e, &h)
My results in Python were similar but they were instances of the same class.
My aim is to be able to parse nested dicts and I want to be able to define which dict assigns to which object... not sure if u understand but theres the Go code for you.
Using dataclass and dacite
from dataclasses import dataclass
import dacite
#dataclass
class Body:
day:int
month:int
year:int
#dataclass
class Dat:
response_time: int
body: Body
data = {'response_time':12, 'body':{'day':1,'month':2,'year':3}}
dat: Dat = dacite.from_dict(Dat,data)
print(dat)
output
Dat(response_time=12, body=Body(day=1, month=2, year=3))
Using pymarshaler (Which is close to to golang approach)
import json
from pymarshaler.marshal import Marshal
class Body:
def __init__(self, day: int, month: int, year: int):
self.day = day
self.month = month
self.year = year
class Dat:
def __init__(self, response_time: int, body: Body):
self.response_time = response_time
self.body = body
marshal = Marshal()
dat_test = Dat(3, Body(1, 2, 3))
dat_json = marshal.marshal(dat_test)
print(dat_json)
result = marshal.unmarshal(Dat, json.loads(dat_json))
print(result.response_time)
see https://pythonawesome.com/marshall-python-objects-to-and-from-json/
So turns out it wasn't that hard, I just didn't want to try.
For anyone with the same problem here's the code.
class Typs(object):
def __init__(self):
self.type = int
self.breed = str
class Deets(object):
def __init__(self):
self.color = str
self.type = Typs()
class Base(object):
def __init__(self):
self.name = str
self.details = Deets()
d = {
"name": "Hello",
"details": {"color": "black", "type": {"type": 2, "breed": "Normal"}},
}
h = Base()
def unmarshal(d, o):
for k, v in d.items():
if hasattr(o, k):
if isinstance(v, dict):
unmarshal(v, getattr(o, k))
else:
setattr(o, k, v)
return o
x = unmarshal(d, h)
Problem
I am trying to map a response object into a Python class representation, but there are a number of keys within the response that are dynamic and therefore I am unable to map them explicitly to class members.
The response object:
{
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
Within the object: '1900000084913' and '360018501198' are dynamically set. (At the moment I have added x and y as placeholders in the Row object)
Code:
from dataclasses import dataclass
from typing import List
#dataclass
class Serialisable:
#classmethod
def from_dict(cls, d):
if d is not None:
return cls(**d)
#dataclass
class CustomField(Serialisable):
id: int
value: str
#dataclass
class Row(Serialisable):
x: str # '1900000084913' - How do I map these?
y: str # '360018501198' -
custom_fields: List[CustomField]
#classmethod
def from_dict(cls, d):
if d is not None:
kwargs = dict(d)
custom_fields = kwargs.pop("custom_fields", None)
if custom_fields is not None:
kwargs["custom_fields"] = [
CustomField.from_dict(field) for field in custom_fields
]
return cls(**kwargs)
#dataclass
class ResponseObject(Serialisable):
rows: List[Row]
#classmethod
def from_dict(cls, d):
if d is not None:
kwargs = dict(d)
rows = kwargs.pop("rows", None)
if rows is not None:
kwargs["rows"] = [
Row.from_dict(row) for row in rows
]
return cls(**kwargs)
if __name__ == "__main__":
response = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
response_obj = ResponseObject.from_dict(response)
print(response_obj)
If the keys are changed to x and y then this will map accordingly.
You have to create your own __init__ method and dynamically set using setattr. Writing your own __init__ also means you have write a __repr__ for printing as well. Keep in mind when you write your own __init__ some dataclass functionalities might not work as intended.
Simplified sample:
data = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
class Row:
def __init__(self, custom_fields, **kwargs):
self.custom_fields = custom_fields
for key, value in kwargs.items():
setattr(self, key, value)
def __repr__(self):
items = ("%s=%r" % (k, v) for k, v in self.__dict__.items())
return "%s(%s)" % (self.__class__.__name__, ', '.join(items))
r = Row(**data["rows"][0])
print(r)
You might also want to consider subclassing dict if you want to have a dynamic structure.
In your case this will not work since your dynamical attributes are integers (or start with an integer number, even though represented as strings). You can't set attributes that start with an integer.
In any other case, maybe try something like the following:
#dataclass(init=False)
class Row(Serialisable):
def __init__(self, custom_fields: List[CustomField], **kwargs):
self.custom_fields = custom_fields
for key, value in kwargs.items():
setattr(self, key, value)
row = Row(custom_fields=[], dynamic_attribute_1="1", dynamic_attribute_2="2")
Accessing e.g. row.dynamic_attribute_1 will then work
>>> row.dynamic_attribute_1
'1'
Edit:
Nevermind, it will also work in your case. At least the instantiation of the object. However, you will not be able to access the instance's attributes via row.123123 since it will raise a SyntaxError, but only by using the getattr method, i.e. getattr(row, "123123").
Assuming you are able to make a few assumptions:
The dynamic field names will generally be numeric (can be cast to int if needed)
You want x to map to the first dynamic key that appears in the dictinoary object, and y to map to the second dynamic key in the object.
Here is one solution that could work based on this:
from dataclasses import dataclass
from typing import List, Dict
d = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
# #dataclass
class SerializableMixin:
#classmethod
def from_dict(cls, d: Dict):
if d is not None:
return cls(**d)
#dataclass
class CustomField(SerializableMixin):
id: int
value: str
#dataclass(init=False)
class Row:
x: str # '1900000084913' - How do I map these?
y: str # '360018501198' -
custom_fields: List[CustomField]
def __init__(self, custom_fields, **kwargs):
self.custom_fields = [CustomField.from_dict(cf) for cf in custom_fields]
placeholder_attrs = ['x', 'y']
for key, value in kwargs.items():
if key.isnumeric():
attr_to_set = placeholder_attrs.pop(0)
setattr(self, attr_to_set, value)
r = Row(**d["rows"][0])
print(r)
# prints:
# Row(x='222222', y='4003112', custom_fields=[CustomField(id=360018501198, value='4003112'), CustomField(id=1900000084913, value='222222')])
I have such model, enum, field:
from pydantic import BaseModel, Json
class SlotActionEnum(Enum):
NORMAL = 'normal'
REASK = 'reask'
class ChannelMessage(Json):
answerText: str
slot_action: SlotActionEnum = SlotActionEnum.NORMAL
class Request(BaseModel):
conversationId: str
channelMessage: ChannelMessage
o = Request(**{
"conversationId": "id10",
"channelMessage": "{\"answerText\": \"sadfg\", \"slot_action\": \"reask\"}"
})
Here is two problem:
slot_action cannot be accessed like that: o.channelMessage.slot_action
This causes AttributeError:
AttributeError: 'dict' object has no attribute 'slot_action'
If I access to this field via square bracket PyCharm highlights:
Value in slot_action is not SlotActionEnum but value like reask or normal
slot_action can be empty string. How could I replace empty string to normal?
What should I do to handle these problems? Should I do something like this:
class ChannelMessage(BaseModel):
answerText: str
slot_action: SlotActionEnum = SlotActionEnum.NORMAL
class Request(BaseModel):
conversationId: str
channelMessage: ChannelMessage
def __init__(__pydantic_self__, **data: Any) -> None:
channel_message = json.loads(data.pop('channelMessage'))
if channel_message['slot_action'] == '':
channel_message['slot_action'] = SlotActionEnum.NORMAL.value
channel_message['slot_action'] = SlotActionEnum(channel_message['slot_action'])
super().__init__(**data, channelMessage=channel_message)
? Well that works but looks ugly.
from pydantic import BaseModel, validator
from enum import Enum
class SlotActionEnum(Enum):
NORMAL = 'normal'
REASK = 'reask'
class ChannelMessage(BaseModel):
answerText: str
slot_action: SlotActionEnum = SlotActionEnum.NORMAL
#validator("slot_action", pre=True)
def valid_slot_action(cls, v):
if v == "":
return SlotActionEnum.NORMAL
return v
class Request(BaseModel):
conversationId: str
channelMessage: ChannelMessage
#validator("channelMessage", pre=True)
def valid_channel_message(cls, v):
if isinstance(v, str):
return ChannelMessage.parse_raw(v)
return v
o = Request(**{
"conversationId": "id10",
"channelMessage": "{\"answerText\": \"sadfg\", \"slot_action\": \"reask\"}"
})
I have a model that relates to some others and I want to make an instance of model factory with factory boy package and send it as json to rest API in django rest framework.
UserFactory:
class UserFactory(factory.django.DjangoModelFactory):
class Meta:
model = User
first_name = factory.Faker('name')
username = factory.Faker('word')
language = factory.SubFactory(LanguageFactory)
LanguageFactory:
class LanguageFactory(factory.django.DjangoModelFactory):
class Meta:
model = Language
name = factory.Faker('language_name')
When I use:
factory.build(dict, FACTORY_CLASS=UserFactory)
it returns:
{'first_name': "Agent 001", 'username': 'john_doe', 'language': <Language: Catalan>}
where language is another model factory but I need it as json to post or patch test.
How would I get something like this?
{'first_name': "Agent 001", 'username': 'john_doe', 'language': [{'name': 'English'}, {'name': 'French'}]}
As in comment mentioned the factory boy doesn't have inner dict factory by itself.
The answer in github worked.
First you should make a function that gets factory class and dict all classes in it:
from functools import partial
from typing import Any, Dict
from factory import Factory
from factory.base import StubObject
def generate_dict_factory(factory: Factory):
def convert_dict_from_stub(stub: StubObject) -> Dict[str, Any]:
stub_dict = stub.__dict__
for key, value in stub_dict.items():
if isinstance(value, StubObject):
stub_dict[key] = convert_dict_from_stub(value)
return stub_dict
def dict_factory(factory, **kwargs):
stub = factory.stub(**kwargs)
stub_dict = convert_dict_from_stub(stub)
return stub_dict
return partial(dict_factory, factory)
then for usage:
# example of usage
UserDictFactory = generate_dict_factory(UserFactory)
and finally if call UserDictFactory() returns what we need.
I was using the answer from Mahmood, and I realized that it was not working properly for lists (when foos = factory.List(FooFactory)). In this case we would expect to get back a list, but with the original code we get back a dict where the keys are the list indexes as strings.
I made the following modification to fix it:
def generate_dict_factory(factory: factory.Factory):
def stub_is_list(stub: StubObject) -> bool:
try:
return all(k.isdigit() for k in stub.__dict__.keys())
except AttributeError:
return False
def convert_dict_from_stub(stub: StubObject) -> Dict[str, Any]:
stub_dict = stub.__dict__
for key, value in stub_dict.items():
if isinstance(value, StubObject):
stub_dict[key] = (
[convert_dict_from_stub(v) for v in value.__dict__.values()]
if stub_is_list(value)
else convert_dict_from_stub(value)
)
return stub_dict
def dict_factory(factory, **kwargs):
stub = factory.stub(**kwargs)
stub_dict = convert_dict_from_stub(stub)
return stub_dict
return partial(dict_factory, factory)
I have nested json as below
{
"product" : "name",
"protocol" : "scp",
"read_logs" : {
"log_type" : "failure",
"log_url" : "htttp:url"
}
}
I am trying to create Python class object with the below code.
import json
class Config (object):
"""
Argument: JSON Object from the configuration file.
"""
def __init__(self, attrs):
if 'log_type' in attrs:
self.log_type = attrs['log_type']
self.log_url = attrs['log_url']
else:
self.product = attrs["product"]
self.protocol = attrs["protocol"]
def __str__(self):
return "%s;%s" %(self.product, self.log_type)
def get_product(self):
return self.product
def get_logurl(self):
return self.log_url
class ConfigLoader (object):
'''
Create a confiuration loaded which can read JSON config files
'''
def load_config (self, attrs):
with open (attrs) as data_file:
config = json.load(data_file, object_hook=load_json)
return config
def load_json (json_object):
return Config (json_object)
loader = ConfigLoader()
config = loader.load_config('../config/product_config.json')
print config.get_protocol()
But, the object_hook is invoking the load_json recursively and the Class Config init is being called twice. So the final object that I created does not contain the nested JSON data.
Is there any way to read the entire nested JSON object into a single Python class ?
Thanks
A variation on Pankaj Singhal's idea, but using a "generic" namespace class instead of namedtuples:
import json
class Generic:
#classmethod
def from_dict(cls, dict):
obj = cls()
obj.__dict__.update(dict)
return obj
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": "123"}}'
x = json.loads(data, object_hook=Generic.from_dict)
print(x.product, x.read_logs.log_type, x.read_logs.log_url)
namedtuple & object_hook can help create a one-liner:
# Create an object with attributes corresponding to JSON keys.
def json_to_obj(data): return json.loads(data, object_hook=lambda converted_dict: namedtuple('X', converted_dict.keys())(*converted_dict.values()))
OR Create a more readable function like below:
def _object_hook(converted_dict): return namedtuple('X', converted_dict.keys())(*converted_dict.values())
def json_to_obj(data): return json.loads(data, object_hook=_object_hook)
Below is the code snippet to use it:
import json
from collections import namedtuple
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": htttp:url}}'
x = json_to_obj(data)
print x.product, x.read_logs.log_type, x.read_logs.log_url
NOTE: Check out namedtuple's rename parameter.
I wrote a simple DFS algorithm to do this job.
Convert nested item as a flat dictionary. In my case, I joined the keys of json item with a dash.
For example, nested item { "a":[{"b": "c"}, {"d":"e"}] } will be transformed as {'a-0-b': 'c', 'a-1-d': 'e'}.
def DFS(item, headItem, heads, values):
if type(item) == type({}):
for k in item.keys():
DFS(item[k], headItem + [k], heads, values)
elif type(item) == type([]):
for i in range(len(item)):
DFS(item[i], headItem + [str(i)], heads, values)
else:
headItemStr = '-'.join(headItem)
heads.append(headItemStr)
values.append(item)
return
def reduce(jsonItem):
heads, values = [], []
DFS(jsonItem, [], heads, values)
return heads, values
def json2dict(jsonItem):
head, value = reduce(jsonItem)
dictHeadValue = { head[i] : value[i] for i in range(len(head))}
return dictHeadValue