JSON serialize a class and change property casing with Python - python

I'd like to create a JSON representation of a class and change the property names automatically from snake_case to lowerCamelCase, as I'd like to comply with PEP8 in Python and also the JavaScript naming conventions (and maybe even more importantly, the backend I'm communicating to uses lowerCamelCase).
I prefer to use the standard json module, but I have nothing against using another, open source library (e.g. jsonpickle might solve my issue?).
>>> class HardwareProfile:
... def __init__(self, vm_size):
... self.vm_size = vm_size
>>> hp = HardwareProfile('Large')
>>> hp.vm_size
'Large'
### ### What I want ### ###
>>> magicjson.dumps(hp)
'{"vmSize": "Large"}'
### ### What I have so far... ### ###
>>> json.dumps(hp, default=lambda o: o.__dict__)
'{"vm_size": "Large"}'

You just need to create a function to transform the snake_case keys to camelCase. You can easily do that using .split, .lower, and .title.
import json
class HardwareProfile:
def __init__(self, vm_size):
self.vm_size = vm_size
self.some_other_thing = 42
self.a = 'a'
def snake_to_camel(s):
a = s.split('_')
a[0] = a[0].lower()
if len(a) > 1:
a[1:] = [u.title() for u in a[1:]]
return ''.join(a)
def serialise(obj):
return {snake_to_camel(k): v for k, v in obj.__dict__.items()}
hp = HardwareProfile('Large')
print(json.dumps(serialise(hp), indent=4, default=serialise))
output
{
"vmSize": "Large",
"someOtherThing": 42,
"a": "a"
}
You could put serialise in a lambda, but I think it's more readable to write it as a proper def function.

Related

pickle, dill and cloudpickle returning field as empty dict on custom class after process termination

I have an object of a custom class that I am trying to serialize and permanently store.
When I serialize it, store it, load it and use it in the same run, it works fine. It only messes up when I've ended the process and then try to load it again from the pickle file. This is the code that works fine:
first_model = NgramModel(3, name="debug")
for paragraph in text:
first_model.train(paragraph_to_sentences(text))
# paragraph to sentences just uses regex to do the equivalent of splitting by punctuation
print(first_model.context_options)
# context_options is a dict (counter)
first_model = NgramModel.load_existing_model("debug")
#load_existing_model loads the pickle file. Look in the class code
print(first_model.context_options)
However, when I run this alone, it prints an empty counter:
first_model = NgramModel.load_existing_model("debug")
print(first_model.context_options)
This is a shortened version of the class file (the only two methods that touch the pickle/dill are update_pickle_state and load_existing_model):
import os
import dill
from itertools import count
from collections import Counter
from os import path
class NgramModel:
context_options: dict[tuple, set[str]] = {}
ngram_count: Counter[tuple] = Counter()
n = 0
pickle_path: str = None
num_paragraphs = 0
num_sentences = 0
def __init__(self, n: int, **kwargs):
self.n = n
self.pickle_path = NgramModel.pathify(kwargs.get('name', NgramModel.gen_pickle_name())) #use name if exists else generate random name
def train(self, paragraph_as_list: list[str]):
'''really the central method that coordinates everything else. Takes a list of sentences, generates data(n-grams) from each, updates the fields, and saves the instance (self) to a pickle file'''
self.num_paragraphs += 1
for sentence in paragraph_as_list:
self.num_sentences += 1
generated = self.generate_Ngrams(sentence)
self.ngram_count.update(generated)
for ngram in generated:
self.add_to_set(ngram)
self.update_pickle_state()
def update_pickle_state(self):
'''saves instance to pickle file'''
file = open(self.pickle_path, "wb")
dill.dump(self, file)
file.close()
#staticmethod
def load_existing_model(name: str):
'''returns object from pickle file'''
path = NgramModel.pathify(name)
file = open(path, "rb")
obj: NgramModel = dill.load(file)
return obj
def generate_Ngrams(self, string: str):
'''ref: https://www.analyticsvidhya.com/blog/2021/09/what-are-n-grams-and-how-to-implement-them-in-python/'''
words = string.split(" ")
words = ["<start>"] * (self.n - 1) + words + ["<end>"] * (self.n - 1)
list_of_tup = []
for i in range(len(words) + 1 - self.n):
list_of_tup.append((tuple(words[i + j] for j in range(self.n - 1)), words[i + self.n - 1]))
return list_of_tup
def add_to_set(self, ngram: tuple[tuple[str, ...], str]):
if ngram[0] not in self.context_options:
self.context_options[ngram[0]] = set()
self.context_options[ngram[0]].add(ngram[1])
#staticmethod
def pathify(name):
'''converts name to path'''
return f"models/{name}.pickle"
#staticmethod
def gen_pickle_name():
for i in count():
new_name = f"unnamed-pickle-{i}"
if not path.exists(NgramModel.pathify(new_name)):
return new_name
All the other fields print properly and are complete and correct except the two dicts
The problem is that is that context_options is a mutable class-member, not an instance member. If I had to guess, dill is only pickling instance members, since the class definition holds class members. That would account for why you see a "filled-out" context_options when you're working in the same shell but not when you load fresh — you're using the dirtied class member in the former case.
It's for stuff like this that you generally don't want to use mutable class members (or similarly, mutable default values in function signatures). More typical is to use something like context_options: dict[tuple, set[str]] = None and then check if it's None in the __init__ to set it to a default value, e.g., an empty dict. Alternatively, you could use a #dataclass and provide a field initializer, i.e.
#dataclasses.dataclass
class NgramModel:
context_options: dict[tuple, set[str]] = dataclasses.field(default_factory=dict)
...
You can observe what I mean about it being a mutable class member with, for instance...
if __name__ == '__main__':
ng = NgramModel(3, name="debug")
print(ng.context_options) # {}
ng.context_options[("foo", "bar")] = {"baz", "qux"}
print(ng.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
ng2 = NgramModel(3, name="debug")
print(ng2.context_options) # {('foo', 'bar'): {'baz', 'qux'}}
I would expect a brand new ng2 to have the same context that the brand new ng had - empty (or whatever an appropriate default is).

Object oriented programming with abstract class

I want to achieve the below:
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
response = transform_data_according_to(data, company_name)
return response
I did the following for it:
class Transform(ABC):
def __init__(self, data):
self.data = data
#abstractmethod
def transform(self):
pass
class CompanyA(Transform):
def transform(self):
# do_transformation
return transformed_data
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
if company_name == CompanyA:
response = CompanyA.transform(data)
return response
Instead i would like to do something like this using correct object oriented principles:
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
response = Transform(data, company_name)
return response
I want to know where I might be thinking wrong in terms of the desired approach versus the implemented approach. Is the implemented approach correct, the if else checks can grow quite big in that case.
Thanks to teraflop
The simple, idiomatic way to do this in Python would be to look up the Transform subclass in a dictionary:
transform_classes = {
"CompanyA": CompanyA,
# ...
}
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
transformer = transform_classes[company_name](data)
return transformer.transform()
If you prefer to be more rigorously object-oriented, you could wrap the dictionary in an object (e.g. TransformLookupByName) instead of accessing it directly.
There are also various kinds of metaprogramming magic you can use to build the dictionary automatically without having to name each subclass explicitly. For example, this will collect all of the Transform subclasses in the current source file:
transform_classes = {
k:v for k,v in globals().items()
if isinstance(v, type) and issubclass(v, Transform) and v != Transform
}

Easiest way to copy all fields from one dataclass instance to another?

Let's assume you have defined a Python dataclass:
#dataclass
class Marker:
a: float
b: float = 1.0
What's the easiest way to copy the values from an instance marker_a to another instance marker_b?
Here's an example of what I try to achieve:
marker_a = Marker(1.0, 2.0)
marker_b = Marker(11.0, 12.0)
# now some magic happens which you hopefully can fill in
print(marker_b)
# result: Marker(a=1.0, b=2.0)
As a boundary condition, I do not want to create and assign a new instance to marker_b.
OK, I could loop through all defined fields and copy the values one by one, but there has to be a simpler way, I guess.
The dataclasses.replace function returns a new copy of the object.
Without passing in any changes, it will return a copy with no modification:
>>> import dataclasses
>>> #dataclasses.dataclass
... class Dummy:
... foo: int
... bar: int
...
>>> dummy = Dummy(1, 2)
>>> dummy_copy = dataclasses.replace(dummy)
>>> dummy_copy.foo = 5
>>> dummy
Dummy(foo=1, bar=2)
>>> dummy_copy
Dummy(foo=5, bar=2)
Note that this is a shallow copy.
Edit to address comments:
If a copy is undesirable, I would probably go with the following:
for key, value in dataclasses.asdict(dummy).items():
setattr(some_obj, key, value)
I think that looping over the fields probably is the easiest way. All the other options I can think of involve creating a new object.
from dataclasses import fields
marker_a = Marker(5)
marker_b = Marker(0, 99)
for field in fields(Marker):
setattr(marker_b, field.name, getattr(marker_a, field.name))
print(marker_b) # Marker(a=5, b=1.0)
#dataclass
class Marker:
a: float
b: float = 1.0
marker_a = Marker(0.5)
marker_b = Marker(**marker_a.__dict__)
marker_b
# Marker(a=0.5, b=1.0)
If you didn't want to create a new instance, try this:
marker_a = Marker(1.0, 2.0)
marker_b = Marker(11.0, 12.0)
marker_b.__dict__ = marker_a.__dict__.copy()
# result: Marker(a=1.0, b=2.0)
Not sure whether that's considered a bad hack though...
Another option which may be more elegant:
import dataclasses
marker_a = Marker(1.0, 2.0)
marker_b = Marker(**dataclasses.asdict(marker_a))
Here's a version that also lets you choose the result dataclass type and override attributes:
dataclassWith(Y(x=2, z=5), y=3) # > Y(x=3, y=3, z=5)
dataclassWith(Y(x=2, z=5), X, x=99) # > X(z=5, x=99) # There is no z
MISSING = object()
def dataclassWith(other, clz=None, **kw):
if clz is None: clz = other.__class__
k = other.__dict__.copy()
k.update(kw)
return clz(**{k:v for k,v in k.items()
if getattr(clz, k, MISSING) is not MISSING})
class TestDataclassUtil(unittest.TestCase):
def test_dataclassWith(self):
#dataclasses.dataclass
class X():
x:int = 1
z:int = 99
#dataclasses.dataclass
class Y(X):
y:int = 2
r = dataclassWith(Y(x=2), y=3)
self.assertTrue(isinstance(r, Y))
self.assertTrue(r.x==2)
self.assertTrue(r.y==3)
self.assertTrue(r.z==99)
r = dataclassWith(Y(x=2), X, z=100)
self.assertTrue(isinstance(r, X))
self.assertTrue(r.x==2)
self.assertTrue(r.z==100)

how to save frequently used physical constants in python

I would like to have a place for my physical constants.
The following answer is already a starting point:
How-to import constants in many files
So I have a seperate file called constants.py which I import into my projects.
Now, i would like to save and access additional information:
units
documentation
The resulting interface should be like:
import constants as c
print c.R
>>> 287.102
print c.R.units
>>> J/(kg K)
print c.R.doc
>>> ideal gas constant
Calculations should use c.R to access the value.
It is basically a class, which behaves like the float class
but holds two additional strings: units and documentation.
How can this be designed?
Inheriting from class float, you have to overwrite the __new__-method:
class Constant(float):
def __new__(cls, value, units, doc):
self = float.__new__(cls, value)
self.units = units
self.doc = doc
return self
R = Constant(287.102, "J/(kg K)", "deal gas constant")
print R, R * 2
>>> 287.102 574.204
print R.units
>>> J/(kg K)
print R.doc
>>> ideal gas constant
I recommend using the json library, which will allow you to store your constant values in a readable and modifiable format.
Using #Daniel's Constant class which inherits from float and adds your custom attributes, you can load all your constants at once into a new Constants object.
You can then get these attributes as c.R to access the value.
Complete file:
#!/usr/bin/env python
import json
class Constant(float):
def __new__(cls, value):
self = float.__new__(cls, value["value"]) # KeyError if missing "value"
self.units = value.get("units", None)
self.doc = value.get("doc", None)
return self
class Constants():
# load the json file into a dictionary of Constant objects
def __init__(self):
with open("constants.json") as fh:
json_object = json.load(fh)
# create a new dictionary
self.constants_dict = {}
for constant in json_object.keys():
# put each Constant into it
self.constants_dict[constant] = Constant(json_object[constant])
# try to get the requested attribute
def __getattr__(self, name):
# missing keys are returned None, use self.constants_dict[name]
# if you want to raise a KeyError instead
return self.constants_dict.get(name, None)
c = Constants()
print c.R # 287.102
print c.R.doc # ideal gas constant
print c.R + 5 # 292.102
print c.F.units # C mol-1
print c.missing # None
Example constants.json:
{
"R": {
"value": 287.102,
"units": "J/(kg K)",
"doc": "ideal gas constant"
},
"F": {
"value": 96485.33,
"units": "C mol-1",
"doc": "Faraday contant"
}
}

Python / YAML: How to initialize additional objects not just from the YAML file, within loadConfig?

I have what I think is a small misconception with loading some YAML objects. I defined the class below.
What I want to do is load some objects with the overridden loadConfig function for YAMLObjects. Some of these come from my .yaml file, but others should be built out of objects loaded from the YAML file.
For instance, in the class below, I load a member object named "keep" which is a string naming some items to keep in the region. But I want to also parse this into a list and have the list stored as a member object too. And I don't want the user to have to give both the string and list version of this parameter in the YAML.
My current work around has been to override the __getattr__ function inside Region and make it create the defaults if it looks and doesn't find them. But this is clunky and more complicated than needed for just initializing objects.
What convention am I misunderstanding here. Why doesn't the loadConfig method create additional things not found in the YAML?
import yaml, pdb
class Region(yaml.YAMLObject):
yaml_tag = u'!Region'
def __init__(self, name, keep, drop):
self.name = name
self.keep = keep
self.drop = drop
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
def loadConfig(self, yamlConfig):
yml = yaml.load_all(file(yamlConfig))
for data in yml:
# These get created fine
self.name = data["name"]
self.keep = data["keep"]
self.drop = data["drop"]
# These do not get created.
self.keep_list = self.keep.split("+")
self.drop_list = self.drop.split("+")
self.pattern = "+".join(self.keep_list) + "-" + "-".join(self.drop_list)
###
### End Region
if __name__ == "__main__":
my_yaml = "/home/path/to/test.yaml"
region_iterator = yaml.load_all(file(my_yaml))
# Set a debug breakpoint to play with region_iterator and
# confirm the extra stuff isn't created.
pdb.set_trace()
And here is test.yaml so you can run all of this and see what I mean:
Regions:
# Note: the string conventions below are for an
# existing system. This is a shortened, representative
# example.
Market1:
!Region
name: USAndGB
keep: US+GB
drop: !!null
Market2:
!Region
name: CanadaAndAustralia
keep: CA+AU
drop: !!null
And here, for example, is what it looks like for me when I run this in an IPython shell and explore the loaded object:
In [57]: %run "/home/espears/testWorkspace/testRegions.py"
--Return--
> /home/espears/testWorkspace/testRegions.py(38)<module>()->None
-> pdb.set_trace()
(Pdb) region_iterator
<generator object load_all at 0x1139d820>
(Pdb) tmp = region_iterator.next()
(Pdb) tmp
{'Regions': {'Market2': <__main__.Region object at 0x1f858550>, 'Market1': <__main__.Region object at 0x11a91e50>}}
(Pdb) us = tmp['Regions']['Market1']
(Pdb) us
<__main__.Region object at 0x11a91e50>
(Pdb) us.name
'USAndGB'
(Pdb) us.keep
'US+GB'
(Pdb) us.keep_list
*** AttributeError: 'Region' object has no attribute 'keep_list'
A pattern I have found useful for working with yaml for classes that are basically storage is to have the loader use the constructor so that objects are created in the same way as when you make them normally. If I understand what you are attempting to do correctly, this kind of structure might be useful:
import inspect
import yaml
from collections import OrderedDict
class Serializable(yaml.YAMLObject):
__metaclass__ = yaml.YAMLObjectMetaclass
#property
def _dict(self):
dump_dict = OrderedDict()
for var in inspect.getargspec(self.__init__).args[1:]:
if getattr(self, var, None) is not None:
item = getattr(self, var)
if isinstance(item, np.ndarray) and item.ndim == 1:
item = list(item)
dump_dict[var] = item
return dump_dict
#classmethod
def to_yaml(cls, dumper, data):
return ordered_dump(dumper, '!{0}'.format(data.__class__.__name__),
data._dict)
#classmethod
def from_yaml(cls, loader, node):
fields = loader.construct_mapping(node, deep=True)
return cls(**fields)
def ordered_dump(dumper, tag, data):
value = []
node = yaml.nodes.MappingNode(tag, value)
for key, item in data.iteritems():
node_key = dumper.represent_data(key)
node_value = dumper.represent_data(item)
value.append((node_key, node_value))
return node
You would then want to have your Region class inherit from Serializable, and remove the loadConfig stuff. The code I posted inspects the constructor to see what data to save to the yaml file, and then when loading a yaml file calls the constructor with that same set of data. That way you just have to get the logic right in your constructor and the yaml loading should get it for free.
That code was ripped from one of my projects, apologies in advance if it doesn't quite work. It is also slightly more complicated than it needs to be because I wanted to control the order of output by using OrderedDict. You could replace my ordered_dump function with a call to dumper.represent_dict.

Categories