Serialisation of dynamic Python Dictionary Keys - python

Problem
I am trying to map a response object into a Python class representation, but there are a number of keys within the response that are dynamic and therefore I am unable to map them explicitly to class members.
The response object:
{
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
Within the object: '1900000084913' and '360018501198' are dynamically set. (At the moment I have added x and y as placeholders in the Row object)
Code:
from dataclasses import dataclass
from typing import List
#dataclass
class Serialisable:
#classmethod
def from_dict(cls, d):
if d is not None:
return cls(**d)
#dataclass
class CustomField(Serialisable):
id: int
value: str
#dataclass
class Row(Serialisable):
x: str # '1900000084913' - How do I map these?
y: str # '360018501198' -
custom_fields: List[CustomField]
#classmethod
def from_dict(cls, d):
if d is not None:
kwargs = dict(d)
custom_fields = kwargs.pop("custom_fields", None)
if custom_fields is not None:
kwargs["custom_fields"] = [
CustomField.from_dict(field) for field in custom_fields
]
return cls(**kwargs)
#dataclass
class ResponseObject(Serialisable):
rows: List[Row]
#classmethod
def from_dict(cls, d):
if d is not None:
kwargs = dict(d)
rows = kwargs.pop("rows", None)
if rows is not None:
kwargs["rows"] = [
Row.from_dict(row) for row in rows
]
return cls(**kwargs)
if __name__ == "__main__":
response = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
response_obj = ResponseObject.from_dict(response)
print(response_obj)
If the keys are changed to x and y then this will map accordingly.

You have to create your own __init__ method and dynamically set using setattr. Writing your own __init__ also means you have write a __repr__ for printing as well. Keep in mind when you write your own __init__ some dataclass functionalities might not work as intended.
Simplified sample:
data = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
class Row:
def __init__(self, custom_fields, **kwargs):
self.custom_fields = custom_fields
for key, value in kwargs.items():
setattr(self, key, value)
def __repr__(self):
items = ("%s=%r" % (k, v) for k, v in self.__dict__.items())
return "%s(%s)" % (self.__class__.__name__, ', '.join(items))
r = Row(**data["rows"][0])
print(r)
You might also want to consider subclassing dict if you want to have a dynamic structure.

In your case this will not work since your dynamical attributes are integers (or start with an integer number, even though represented as strings). You can't set attributes that start with an integer.
In any other case, maybe try something like the following:
#dataclass(init=False)
class Row(Serialisable):
def __init__(self, custom_fields: List[CustomField], **kwargs):
self.custom_fields = custom_fields
for key, value in kwargs.items():
setattr(self, key, value)
row = Row(custom_fields=[], dynamic_attribute_1="1", dynamic_attribute_2="2")
Accessing e.g. row.dynamic_attribute_1 will then work
>>> row.dynamic_attribute_1
'1'
Edit:
Nevermind, it will also work in your case. At least the instantiation of the object. However, you will not be able to access the instance's attributes via row.123123 since it will raise a SyntaxError, but only by using the getattr method, i.e. getattr(row, "123123").

Assuming you are able to make a few assumptions:
The dynamic field names will generally be numeric (can be cast to int if needed)
You want x to map to the first dynamic key that appears in the dictinoary object, and y to map to the second dynamic key in the object.
Here is one solution that could work based on this:
from dataclasses import dataclass
from typing import List, Dict
d = {
'rows': [
{
'1900000084913': '222222',
'360018501198': '4003112',
'custom_fields': [
{'id': 360018501198, 'value': '4003112'},
{'id': 1900000084913, 'value': '222222'}
]
}
]
}
# #dataclass
class SerializableMixin:
#classmethod
def from_dict(cls, d: Dict):
if d is not None:
return cls(**d)
#dataclass
class CustomField(SerializableMixin):
id: int
value: str
#dataclass(init=False)
class Row:
x: str # '1900000084913' - How do I map these?
y: str # '360018501198' -
custom_fields: List[CustomField]
def __init__(self, custom_fields, **kwargs):
self.custom_fields = [CustomField.from_dict(cf) for cf in custom_fields]
placeholder_attrs = ['x', 'y']
for key, value in kwargs.items():
if key.isnumeric():
attr_to_set = placeholder_attrs.pop(0)
setattr(self, attr_to_set, value)
r = Row(**d["rows"][0])
print(r)
# prints:
# Row(x='222222', y='4003112', custom_fields=[CustomField(id=360018501198, value='4003112'), CustomField(id=1900000084913, value='222222')])

Related

Use Variable As Dictionary Key Set

I parse a JSON file to a dictionary, example JSON data below
{
"environmental": {
"temprature": {
"test" : "temprature",
"unit": "c",
"now": 12.65,
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03"
}
}
}
I would like to either retrieve a value or set a value using a list tuple or string as the dictionary key.
var_lst_key = ["environmental", "temprature", "now"]
var_dict_x[var_lst_key] = "x"
or
print(var_dict_x[var_lst_key])
Part 1: Doing it the easy way: using functions
A nested lookup is pretty easy to do. You iterate over the keys, and keep replacing the object you're looking into with the value at the key you're currently looking at:
def nested_get(obj, keys):
for key in keys:
obj = obj[key]
return obj
def nested_set(obj, keys, value):
for key in keys[:-1]:
# Drill down until the penultimate key
obj = obj[key]
# Set value of last key
obj[keys[-1]] = value
To run this:
jstr = """{ "environmental": {
"temprature": {
"test" : "temprature",
"unit": "c",
"now": 12.65,
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03"
}
}}"""
jobj = json.loads(jstr)
var_lst_key = ["environmental", "temprature", "now"]
nested_lookup(jobj, var_lst_key) # Returns 12.65
invalid_keys = ["environmental", "temprature", "hello"]
nested_lookup(jobj, invalid_keys) # throws KeyError 'hello'
nested_set(jobj, var_lst_key, "HELLO!")
nested_lookup(jobj, var_lst_key) # Returns HELLO!
Part 2: Doing it the fancy way: using a derived class
Now if you really want to use the dict[key] = value syntax, you're going to have to extend the dict class to override its __getitem__() and __setitem__() methods.
class NestedLookupDict(dict):
def __init__(self, *args, **kwargs):
super(type(self), self).__init__(*args, **kwargs)
self.insert_missing_keys = True
def __getitem__(self, indices):
# Get the super dictionary for easy lookup
obj = self
for i in indices:
obj = dict.__getitem__(obj, i)
return obj
def __setitem__(self, indices, value):
# Get the base dictionary
obj = self
# Drill down until the penultimate key
for i in indices[:-1]:
# Insert a new dict if a key is missing
if self.insert_missing_keys and not dict.__contains__(obj, i):
dict.__setitem__(obj, i, dict())
obj = dict.__getitem__(obj, i)
# Set the value at the final key
dict.__setitem__(obj, indices[-1], value)
To use this, let's use the json object parsed from the string like before:
# jobj = {... something ...}
nested_dict = NestedLookupDict(jobj)
print(nested_dict[var_lst_key]) # Prints 12.65
nested_dict[var_lst_key] = "HELLO!"
print(nested_dict[var_lst_key]) # Prints HELLO!
When nested_dict.insert_missing_keys is set to True (by default), the __setitem__() method adds missing dictionaries if required.
newkey = ["environmental", "temprature", "newkey"]
nested_dict[newkey] = "NEWKEY!!!"
print(nested_dict[newkey]) # Prints NEWKEY!!!
newkey2 = ["environmental", "temprature", "nested", "newkey"]
nested_dict[newkey2] = "NESTEDNEWKEY!!!"
print(nested_dict[newkey2]) # Prints NESTEDNEWKEY!!!
At the end of all this, you can dump the object to json to see what it looks like:
print(json.dumps(nested_dict))
# Output:
{
"environmental": {
"temprature": {
"test": "temprature",
"unit": "c",
"now": "HELLO!",
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03",
"newkey": "NEWKEY!!!",
"nested": {
"newkey": "NESTEDNEWKEY!!!"
}
}
}
}
Part 3: Way overkill, but oh! so much fun to code: Souped up NestedLookupDict
Additional features to so you can use it almost like dict:
Delete keys using del nested_dict[key]
Check if keys exist with key in nested_dict
nested_dict.get(key, default) absorbs the KeyError if key doesn't exist, and returns default
Implemented a type check on keys: they must be list or tuple now
Quirks:
Because of the way __getitem__() is implemented, nested_dict[empty_list] returns a reference to nested_dict (itself). If this is a bad thing, a check for empty keys could be added. However, I don't see any problems coming from leaving it this way. Some consequences of this quirk:
To keep this behavior consistent with how __contains__() works, the __contains__() function returns True for empty key. [] in nested_list := True
You cannot, by definition, set nested_list[[]]. That throws a ValueError
class NestedLookupDict(dict):
def __init__(self, *args, **kwargs):
super(type(self), self).__init__(*args, **kwargs)
self.insert_missing_keys = True
def check_keys(self, keys):
if not isinstance(keys, (list, tuple)):
raise TypeError("keys must be of type list or tuple")
def get(self, keys, default=None):
self.check_keys(keys)
try:
return self.__getitem__(keys)
except KeyError:
return default
def __contains__(self, keys):
self.check_keys(keys)
if not keys: return True # nested_dict contains the key [] = itself
if len(keys) > 1:
return self.__getitem__(keys[:-1]).__contains__(keys[-1])
else:
return dict.__contains__(self, keys[0])
def __delitem__(self, keys):
self.check_keys(keys)
obj = self
for i in keys[:-1]:
obj = dict.__getitem__(obj, i)
dict.__delitem__(obj, keys[-1])
def __getitem__(self, keys):
self.check_keys(keys)
# Get the super dictionary for easy lookup
obj = self
for i in keys:
obj = dict.__getitem__(obj, i)
return obj
def __setitem__(self, keys, value):
self.check_keys(keys)
if not keys: raise ValueError("keys cannot be empty")
# Get the base dictionary
obj = self
# Drill down until the penultimate key
for i in keys[:-1]:
# Insert a new dict if a key is missing
if self.insert_missing_keys and not dict.__contains__(obj, i):
dict.__setitem__(obj, i, dict())
obj = dict.__getitem__(obj, i)
# Set the value at the final key
dict.__setitem__(obj, keys[-1], value)
You can use the json and load it as dict as follows:
# importing the module
import json
# Opening JSON file
with open('data.json') as json_file:
data = json.load(json_file)
# Print the type of data variable
print("Type:", type(data))
# Print the data of dictionary
print("\nPeople1:", data['people1'])
print("\nPeople2:", data['people2'])
The following code outputs each element from dictionary using keys

Python Refactor JSON into different JSON Structure

I have a bunch of JSON data that I did mostly by hand. Several thousand lines. I need to refactor it into a totally different format using Python.
An overview of my 'stuff':
Column: The basic 'unit' of my data. Each Column has attributes. Don't worry about the meaning of the attributes, but the attributes need to be retained for each Column if they exist.
Folder: Folders group Columns and other Folders together. The folders currently have no attributes, they (currently) only contain other Folder and Column objects (Object does not necessarily refer to JSON objects here... more of an 'entity')
Universe: Universes group everything into big chunks which, in the larger scope of my project, are unable to interact with each other. That is not important here, but that's what they do.
Some limitations:
Columns cannot contain other Column objects, Folder objects, or Universe objects.
Folders cannot contain Universe objects.
Universes cannot contain other Universe objects.
Currently, I have Columns in this form:
"Column0Name": {
"type": "a type",
"dtype": "data type",
"description": "abcdefg"
}
and I need it to go to:
{
"name": "Column0Name",
"type": "a type",
"dtype": "data type",
"description": "abcdefg"
}
Essentially I need to convert the Column key-value things to an array of things (I am new to JSON, don't know the terminology). I also need each Folder to end up with two new JSON arrays (in addition to the "name": "FolderName" key-value pair). It needs a "folders": [] and "columns": [] to be added. So I have this for folders:
"Folder0Name": {
"Column0Name": {
"type": "a",
"dtype": "b",
"description": "c"
},
"Column1Name": {
"type": "d",
"dtype": "e",
"description": "f"
}
}
and need to go to this:
{
"name": "Folder0Name",
"folders": [],
"columns": [
{"name": "Column0Name", "type": "a", "dtype": "b", "description": "c"},
{"name": "Column1Name", "type": "d", "dtype": "e", "description": "f"}
]
}
The folders will also end up in an array inside its parent Universe. Likewise, each Universe will end up with "name", "folders", and "columns" things. As such:
{
"name": "Universe0",
"folders": [a bunch of folders in a JSON array],
"columns": [occasionally some columns in a JSON array]
}
Bottom line:
I'm going to guess that I need a recursive function to iterate though all the nested dictionaries after I import the JSON data with the json Python module.
I'm thinking some sort of usage of yield might help but I'm not super familiar yet with it.
Would it be easier to update the dicts as I go, or destroy each key-value pairs and construct an entirely new dict as I go?
Here is what I have so far. I'm stuck on getting the generator to return actual dictionaries instead of a generator object.
import json
class AllUniverses:
"""Container to hold all the Universes found in the json file"""
def __init__(self, filename):
self._fn = filename
self.data = {}
self.read_data()
def read_data(self):
with open(self._fn, 'r') as fin:
self.data = json.load(fin)
return self
def universe_key(self):
"""Get the next universe key from the dict of all universes
The key will be used as the name for the universe.
"""
yield from self.data
class Universe:
def __init__(self, json_filename):
self._au = AllUniverses(filename=json_filename)
self.uni_key = self._au.universe_key()
self._universe_data = self._au.data.copy()
self._col_attrs = ['type', 'dtype', 'description', 'aggregation']
self._folders_list = []
self._columns_list = []
self._type = "Universe"
self._name = ""
self.uni = dict()
self.is_folder = False
self.is_column = False
def output(self):
# TODO: Pass this to json.dump?
# TODO: Still need to get the actual folder and column dictionaries
# from the generators
out = {
"name": self._name,
"type": "Universe",
"folder": [f.me for f in self._folders_list],
"columns": [c.me for c in self._columns_list]}
return out
def update_universe(self):
"""Get the next universe"""
universe_k = next(self.uni_key)
self._name = str(universe_k)
self.uni = self._universe_data.pop(universe_k)
return self
def parse_nodes(self):
"""Process all child nodes"""
nodes = [_ for _ in self.uni.keys()]
for k in nodes:
v = self.uni.pop(k)
self._is_column(val=v)
if self.is_column:
fc = Column(data=v, key_name=k)
self._columns_list.append(fc)
else:
fc = Folder(data=v, key_name=k)
self._folders_list.append(fc)
return self
def _is_column(self, val):
"""Determine if val is a Column or Folder object"""
self.is_folder = False
self._column = False
if isinstance(val, dict) and not val:
self.is_folder = True
elif not isinstance(val, dict):
raise TypeError('Cannot handle inputs not of type dict')
elif any([i in val.keys() for i in self._col_attrs]):
self._column = True
else:
self.is_folder = True
return self
def parse_children(self):
for folder in self._folders_list:
assert(isinstance(folder, Folder)), f'bletch idk what happened'
folder.parse_nodes()
class Folder:
def __init__(self, data, key_name):
self._data = data.copy()
self._name = str(key_name)
self._node_keys = [_ for _ in self._data.keys()]
self._folders = []
self._columns = []
self._col_attrs = ['type', 'dtype', 'description', 'aggregation']
#property
def me(self):
# maybe this should force the code to parse all children of this
# Folder? Need to convert the generator into actual dictionaries
return {"name": self._name, "type": "Folder",
"columns": [(c.me for c in self._columns)],
"folders": [(f.me for f in self._folders)]}
def parse_nodes(self):
"""Parse all the children of this Folder
Parse through all the node names. If it is detected to be a Folder
then create a Folder obj. from it and add to the list of Folder
objects. Else create a Column obj. from it and append to the list
of Column obj.
This should be appending dictionaries
"""
for key in self._node_keys:
_folder = False
_column = False
values = self._data.copy()[key]
if isinstance(values, dict) and not values:
_folder = True
elif not isinstance(values, dict):
raise TypeError('Cannot handle inputs not of type dict')
elif any([i in values.keys() for i in self._col_attrs]):
_column = True
else:
_folder = True
if _folder:
f = Folder(data=values, key_name=key)
self._folders.append(f.me)
else:
c = Column(data=values, key_name=key)
self._columns.append(c.me)
return self
class Column:
def __init__(self, data, key_name):
self._data = data.copy()
self._stupid_check()
self._me = {
'name': str(key_name),
'type': 'Column',
'ctype': self._data.pop('type'),
'dtype': self._data.pop('dtype'),
'description': self._data.pop('description'),
'aggregation': self._data.pop('aggregation')}
def __str__(self):
# TODO: pretty sure this isn't correct
return str(self.me)
#property
def me(self):
return self._me
def to_json(self):
# This seems to be working? I think?
return json.dumps(self, default=lambda o: str(self.me)) # o.__dict__)
def _stupid_check(self):
"""If the key isn't in the dictionary, add it"""
keys = [_ for _ in self._data.keys()]
keys_defining_a_column = ['type', 'dtype', 'description', 'aggregation']
for json_key in keys_defining_a_column:
if json_key not in keys:
self._data[json_key] = ""
return self
if __name__ == "__main__":
file = r"dummy_json_data.json"
u = Universe(json_filename=file)
u.update_universe()
u.parse_nodes()
u.parse_children()
print('check me')
And it gives me this:
{
"name":"UniverseName",
"type":"Universe",
"folder":[
{"name":"Folder0Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB0B0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB190>]
},
{"name":"Folder2Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB040>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB120>]
},
{"name":"Folder4Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB270>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB200>]
},
{"name":"Folder6Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB2E0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB350>]
},
{"name":"Folder8Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB3C0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB430>]
}
],
"columns":[]
}
If there is an existing tool for this kind of transformation so that I don't have to write Python code, that would be an attractive alternative, too.
Lets create the 3 classes needed to represent Columns, Folders and Unverses. Before starting some topics I wanna talk about, I give a short description of them here, if any of them is new to you I can go deeper:
I will use type annotations to make clear what type each variable is.
I am gonna use __slots__. By telling the Column class that its instances are gonna have a name, ctype, dtype, description and aggragation attributes, each instance of Column will require less memory space. The downside is that it will not accept any other attribute not listed there. This is, it saves memory but looses flexibility. As we are going to have several (maybe hundreds or thousands) of instances, reduced memory footprint seems more important than the flexibility of being able to add any attribute.
Each class will have the standard constructor where every argument has a default value except name, which is mandatory.
Each class will have another constructor called from_old_syntax. It is going to be a class method that receives the string corresponding to the name and a dict corresponding to the data as its arguments and outputs the corresponding instance (Column, Folder or Universe).
Universes are basically the same as Folders with different names (for now) so it will basically inherit it (class Universe(Folder): pass).
from typing import List
class Column:
__slots__ = 'name', 'ctype', 'dtype', 'description', 'aggregation'
def __init__(
self,
name: str,
ctype: str = '',
dtype: str = '',
description: str = '',
aggregation: str = '',
) -> None:
self.name = name
self.ctype = ctype
self.dtype = dtype
self.description = description
self.aggregation = aggregation
#classmethod
def from_old_syntax(cls, name: str, data: dict) -> "Column":
column = cls(name)
for key, value in data.items():
# The old syntax used type for column type but in the new syntax it
# will have another meaning so we use ctype instead
if key == 'type':
key = 'ctype'
try:
setattr(column, key, value)
except AttributeError as e:
raise AttributeError(f"Unexpected key {key} for Column") from e
return column
class Folder:
__slots__ = 'name', 'folders', 'columns'
def __init__(
self,
name: str,
columns: List[Column] = None,
folders: List["Folder"] = None,
) -> None:
self.name = name
if columns is None:
self.columns = []
else:
self.columns = [column for column in columns]
if folders is None:
self.folders = []
else:
self.folders = [folder for folder in folders]
#classmethod
def from_old_syntax(cls, name: str, data: dict) -> "Folder":
columns = [] # type: List[Column]
folders = [] # type: List["Folder"]
for key, value in data.items():
# Determine if it is a Column or a Folder
if 'type' in value and 'dtype' in value:
columns.append(Column.from_old_syntax(key, value))
else:
folders.append(Folder.from_old_syntax(key, value))
return cls(name, columns, folders)
class Universe(Folder):
pass
As you can see the constructors are pretty trivial, assign the arguments to the attributes and done. In the case of Folders (and thus in Universes too), two arguments are lists of columns and folders. The default value is None (in this case we initialize as an empty list) because using mutable variables as default values has some issues so it is good practice to use None as the default value for mutable variables (such as lists).
Column's from_old_syntax class method creates an empty Column with the provided name. Afterwards we iterate over the data dict that was also provided and assign its key value pair to its corresponding attribute. There is a special case where "type" key is converted to "ctype" as "type" is going to be used for a different purpose with the new syntax. The assignation itself is done by setattr(column, key, value). We have included it inside a try ... except ... clause because as we said above, only the items in __slots__ can be used as attributes, so if there is an attribute that you forgot, you will get an exception saying "AttributeError: Unexpected key 'NAME'" and you will only have to add that "NAME" to the __slots__.
Folder's (and thus Unverse's) from_old_syntax class method is even simpler. Create a list of columns and folders, iterate over the data checking if it is a folder or a column and use the appropiate from_old_syntax class method. Then use those two lists and the provided name to return the instance. Notice that Folder.from_old_syntax notation is used to create the folders instead of cls.from_old_syntax because cls may be Universe. However, to create the insdance we do use cls(...) as here we do want to use Universe or Folder.
Now you could do universes = [Universe.from_old_syntax(name, data) for name, data in json.load(f).items()] where f is the file and you will get all your Universes, Folders and Columns in memory. So now we need to encode them back to JSON. For this we are gonna extend the json.JSONEncoder so that it knows how to parse our classes into dictionaries that it can encode normally. To do so, you just need to overwrite the default method, check if the object passed is of our classes and return a dict that will be encoded. If it is not one of our classes we will let the parent default method to take care of it.
import json
# JSON fields with this values will be omitted
EMPTY_VALUES = "", [], {}
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, (Column, Folder, Universe)):
# Make a dict with every item in their respective __slots__
data = {
attr: getattr(obj, attr) for attr in obj.__slots__
if getattr(obj, attr) not in EMPTY_VALUES
}
# Add the type fild with the class name
data['type'] = obj.__class__.__name__
return data
# Use the parent class function for any object not handled explicitly
super().default(obj)
Converting the classes to dictionaries is basically taking what is in __slots__ as the key and the attribute's value as the value. We will filter those values that are an empty string, an empty list or an empty dict as we do not need to write them to JSON. We finally add the "type" key to the dict by reading the objects class name (Column, Folder and Universe).
To use it you have to pass the CustomEncoder as the cls argument to json.dump.
So the code will look like this (omitting the class definitions to keep it short):
import json
from typing import List
# JSON fields with this values will be omitted
EMPTY_VALUES = "", [], {}
class Column:
# ...
class Folder:
# ...
class Universe(Folder):
pass
class CustomEncoder(json.JSONEncoder):
# ...
if __name__ == '__main__':
with open('dummy_json_data.json', 'r') as f_in, open('output.json', 'w') as f_out:
universes = [Universe.from_old_syntax(name, data)
for name, data in json.load(f_in).items()]
json.dump(universes, f_out, cls=CustomEncoder, indent=4)

any cattrs solution to serialize attribute use different name?

i was trying to find a solution similar to java Jackson ObjectMapper that can do serialization/deserialization of python object to json. and find that
cattrs is closest to what i need. but it cannot do attribute mapping like use firstName in json but first_name in the deserialized object.
attrs-serde can do the attribute mapping but cannot do recursive deserialization.
the problem can be illustrated in this example,
import attr
import cattr
from attrs_serde import serde
name_path = ["contact", "personal", "Name"]
phone_path = ["contact", "Phone"]
#serde
#attr.s(auto_attribs=True, frozen=True)
class Name:
first: str
last: str
#serde
#attr.s(auto_attribs=True, frozen=True)
class Person:
name: Name = attr.ib(metadata={"to": name_path, "from": name_path})
phone: str = attr.ib(metadata={"to": phone_path, "from": phone_path})
person_json = {"contact": {"personal": {"Name": {"first": "John", "last": "Smith"}}, "Phone": "555-112233"}}
# XXX: to/from only works on serde
p = Person(name=Name(first="John", last="Smith"), phone="555-112233")
print(p.to_dict())
# {'contact': {'personal': {'Name': {'first': 'John', 'last': 'Smith'}}, 'Phone': '555-112233'}}
p1 = Person.from_dict(person_json)
print(f"p1={p1}")
# p1=Person(name={'first': 'John', 'last': 'Smith'}, phone='555-112233')
# XXX: nested only works on cttrs
person = {"Name": {"First": "John", "Last": "Smith"}, "Phone": "555-112233"}
converter = cattr.Converter()
converter.register_structure_hook(
Person, lambda d, _: Person(name=converter.structure(d["Name"], Name), phone=d.get("Phone"))
)
converter.register_structure_hook(Name, lambda d, _: Name(first=d["First"], last=d.get("Last")))
p2 = converter.structure(person, Person)
print(p2)
assert p == p2
print(converter.unstructure(p2))
# {'name': {'first': 'John', 'last': 'Smith'}, 'phone': '555-112233'}
# {"contact": {"personal": {"name": "John"}, "phone": "555-112233"}}
any more elegant solution using cattr?
you can use humps to do case convert
import humps
import cattr
class CAttrConverter:
converter = cattr.Converter()
def __init__(self):
"""
structure hook for load
unstructure hook for dump
"""
def load(self, params, data_cls, camel_to_snake=True):
"""
:param params: params, mostly from front end
:param data_cls:
:param camel_to_snake: need to convert from camel style to snake style
"""
if camel_to_snake:
params = humps.depascalize(params)
return self.converter.structure(params, data_cls)
def dump(self, data, snake_to_camel=False):
"""
:param data:
:param snake_to_camel: dump as camel case
"""
result: dict = self.converter.unstructure(data)
if snake_to_camel:
result = humps.camelize(result)
return result
Posting this for those in the future.
Yeah you can achieve this by overloading the convert classes methods:
def unstructure_attrs_asdict(self, obj) -> Dict[str, Any]:
def structure_attrs_fromdict(
self, obj: Mapping[str, Any], cl: Type[T]
) -> T:
Or if you want tuples
def unstructure_attrs_astuple(self, obj) -> Tuple[Any, ...]:
def structure_attrs_fromtuple(
self, obj: Tuple[Any, ...], cl: Type[T]
) -> T:
Simple Converter classes that uses the to and from fields from metadata. I will leave handling nested fields to your imagination.
from typing import TypeVar, Dict, Any, Mapping, Type
from cattr import Converter
from cattr._compat import fields
T = TypeVar("T")
class ConverterWithMetaDataOverrides(Converter):
# Classes to Python primitives.
def unstructure_attrs_asdict(self, obj) -> Dict[str, Any]:
"""Our version of `attrs.asdict`, so we can call back to us."""
attrs = fields(obj.__class__)
dispatch = self._unstructure_func.dispatch
rv = self._dict_factory()
for a in attrs:
name = a.name
serialize_as = name
if 'to' in a.metadata:
serialize_as = a.metadata['to']
v = getattr(obj, name)
rv[serialize_as] = dispatch(a.type or v.__class__)(v)
return rv
def structure_attrs_fromdict(
self, obj: Mapping[str, Any], cl: Type[T]
) -> T:
"""Instantiate an attrs class from a mapping (dict)."""
# For public use.
conv_obj = {} # Start with a fresh dict, to ignore extra keys.
dispatch = self._structure_func.dispatch
for a in fields(cl): # type: ignore
# We detect the type by metadata.
type_ = a.type
name = a.name
serialize_from = name
if 'from' in a.metadata:
serialize_from = a.metadata['from']
try:
val = obj[serialize_from]
except KeyError:
continue
if name[0] == "_":
name = name[1:]
conv_obj[name] = (
dispatch(type_)(val, type_) if type_ is not None else val
)
return cl(**conv_obj) # type: ignore
converter = ConverterWithMetaDataOverrides()
Usage:
#attrs(slots=True, frozen=True, auto_attribs=True)
class LevelTwo(object):
a: str = ib(metadata={'from': 'haha_not_a', 'to': 'haha_not_a'})
b: str
c: int
#attrs(slots=True, frozen=True, auto_attribs=True)
class LevelOne(object):
leveltwo: LevelTwo = ib(metadata={'from': 'level_two', 'to': 'level_two'})
#attrs(slots=True, frozen=True, auto_attribs=True)
class Root(object):
levelone: LevelOne = ib(metadata={'from': 'levelOne', 'to': 'levelOne'})
converter.structure(converter.unstructure(Root(levelone=LevelOne(leveltwo=LevelTwo(a='here', b='here_again', c=42)))),
Root)
>>> converter.unstructure(Root(levelone=LevelOne(leveltwo=LevelTwo(a='here', b='here_again', c=42)))
>>> {'levelOne': {'level_two': {'haha_not_a': 'here', 'b': 'here_again', 'c': 42}}}

Doing Class.objects.filter(...) pattern in python

I am looking to use the pattern used in django models of Model.objects.filter(...) to build filters across data. This would probably be a good use case of pandas, but I'm more interested in improving my python (first) before trying that out.
If I have the following data:
DATA = [
{'id': 1, 'name': 'brad', 'color':'red'},
{'id': 2, 'name': 'sylvia', 'color':'blue'},
]
I would like to build something similar to the following:
class MyData:
objects = <something>
And set the objects equivalent to a "ModelManager" and then do the filtering from there so that I can call:
MyData.objects.filter(id>1)
And get:
[
{'id': 2, 'name': 'sylvia', 'color':'blue'}
]
Of course I can do something as simple as:
res = [_ for _ in DATA if _['id'] > 1]
But I'm more interested in designing the pattern itself -- the trivial nature of the example is just meant to show what I'm looking to accomplish.
What would be a good, basic way to do this properly? Here's the relevant class in django for it: https://github.com/django/django/blob/master/django/db/models/query.py#L185.
The OP wants to do this MyData.objects.filter(id>1).
Let's face it.
The problem is Python is greedy (eagerly evaluates expressions), not lazy like Haskell.
Watch David Beazley - Lambda Calculus from the Ground Up - PyCon 2019 for mind-bending λ thing.
Python evaluates id > 1 before calling filter. If we can stop the evaluation for now, we can pass the expression unevaluated to the filter function.
But we can delay expression evaluation until required if we enclose the expression in a function. That's the idea.
The function interface would be filter(lambda: id > 1) if we could implement it.
This interface will be super versatile because any Python expression can be passed and abused.
The implementation;
if we invoke the lambda or any other function with the expression id > 1, Python looks up the name id in the local, enclosing, global scope or builtins depending on the context where the function is invoked.
If we can introduce an object with the name id somewhere in the look-up path before Python finds id in the builtins we can redefine the semantics of the expression.
I'm gonna do it with eval which evaluates expressions in the given context.
DATA = [
{'id': 1, 'name': 'brad', 'color':'red'},
{'id': 2, 'name': 'sylvia', 'color':'blue'},
]
def myfilter(a_lambda):
return filter(lambda obj: eval(a_lambda.__code__, obj.copy()),
DATA)
I pass a dict.copy to eval because eval modifies it's globals object.
See it in action in the context of Model class
In [1]: class Data(Model):
...: name = str()
...: id = int()
...: color = str()
...:
In [2]: Data.objects.create(**{"id": 1, "name": "brad", "color": "red"})
In [3]: Data.objects.create(**{"id": 2, "name": "sylvia", "color": "blue"})
In [4]: Data.objects.create(**{"id": 3, "name": "paul", "color": "red"})
In [5]: Data.objects.create(**{"id": 4, "name": "brandon", "color": "yello"})
In [6]: Data.objects.create(**{"id": 5, "name": "martin", "color": "green"})
In [7]: Data.objects.create(**{"id": 6, "name": "annie", "color": "gray"})
In [8]: pprint([vars(obj) for obj in Data.objects.filter(lambda: id == 1)])
[{'color': 'red', 'id': 1, 'name': 'brad'}]
In [9]: pprint([vars(obj) for obj in Data.objects.filter(lambda: 1 <= id <= 2)])
[{'color': 'red', 'id': 1, 'name': 'brad'},
{'color': 'blue', 'id': 2, 'name': 'sylvia'}]
In [10]: pprint([vars(obj) for obj in Data.objects.filter(lambda: color == "blue")])
[{'color': 'blue', 'id': 2, 'name': 'sylvia'}]
In [11]: pprint([vars(obj) for obj in Data.objects.filter(lambda: "e" in color and (name is "brad" or name is "sylvia"))])
[{'color': 'red', 'id': 1, 'name': 'brad'},
{'color': 'blue', 'id': 2, 'name': 'sylvia'}]
In [12]: pprint([vars(obj) for obj in Data.objects.filter(lambda: id % 2 == 1)])
[{'color': 'red', 'id': 1, 'name': 'brad'},
{'color': 'red', 'id': 3, 'name': 'paul'},
{'color': 'green', 'id': 5, 'name': 'martin'}]
The Data class inherits from Model. The Model gives Data the __init__ method and a class attribute named objects that points to a MetaManager instance which is a descriptor.
The MetaManager returns a Manager instance to sub classes of Model upon access of objects attribute from the subclass. The MetaManger identifies the accessing class and passes that to the Manager instance.
The Manager handles object creation, persistence and fetch.
The db is implemented as a class attribute of Manager for simplicity.
To stop abuse with global objects via functions the filter function raises an exception if a lambda is not passed.
from collections import defaultdict
from collections.abc import Callable
class MetaManager:
def __get__(self, obj, objtype):
if obj is None:
return Manager(objtype)
else:
raise AttributeError(
"Manger isn't accessible via {} instances".format(objtype)
)
class Manager:
_store = defaultdict(list)
def __init__(self, client):
self._client = client
self._client_name = "{}.{}".format(client.__module__, client.__qualname__)
def create(self, **kwargs):
self._store[self._client_name].append(self._client(**kwargs))
def all(self):
return (obj for obj in self._store[self._client_name])
def filter(self, a_lambda):
if a_lambda.__code__.co_name != "<lambda>":
raise ValueError("a lambda required")
return (
obj
for obj in self._store[self._client_name]
if eval(a_lambda.__code__, vars(obj).copy())
)
class Model:
objects = MetaManager()
def __init__(self, **kwargs):
if type(self) is Model:
raise NotImplementedError
class_attrs = self.__get_class_attributes(type(self))
self.__init_instance(class_attrs, kwargs)
def __get_class_attributes(self, cls):
attrs = vars(cls)
if "objects" in attrs:
raise AttributeError(
'class {} has an attribute named "objects" of type "{}"'.format(
type(self), type(attrs["objects"])
)
)
attrs = {
attr: obj
for attr, obj in vars(cls).items()
if not attr.startswith("_") and not isinstance(obj, Callable)
}
return attrs
def __init_instance(self, attrs, kwargs_dict):
for key, item in kwargs_dict.items():
if key not in attrs:
raise TypeError('Got an unexpected key word argument "{}"'.format(key))
if isinstance(item, type(attrs[key])):
setattr(self, key, item)
else:
raise TypeError(
"Expected type {}, got {}".format(type(attrs[key]), type(item))
)
if __name__ == "__main__":
from pprint import pprint
class Data(Model):
name = str()
id = int()
color = str()
Data.objects.create(**{"id": 1, "name": "brad", "color": "red"})
Data.objects.create(**{"id": 2, "name": "sylvia", "color": "blue"})
Data.objects.create(**{"id": 3, "name": "paul", "color": "red"})
Data.objects.create(**{"id": 4, "name": "brandon", "color": "yello"})
Data.objects.create(**{"id": 5, "name": "martin", "color": "green"})
Data.objects.create(**{"id": 6, "name": "annie", "color": "gray"})
pprint([vars(obj) for obj in Data.objects.filter(lambda: id == 1)])
pprint([vars(obj) for obj in Data.objects.filter(lambda: 1 <= id <= 2)])
pprint([vars(obj) for obj in Data.objects.filter(lambda: color == "blue")])
pprint(
[
vars(obj)
for obj in Data.objects.filter(
lambda: "e" in color and (name is "brad" or name is "sylvia")
)
]
)
pprint([vars(obj) for obj in Data.objects.filter(lambda: id % 2 == 1)])
If you want the full django Model experience, i.e.:
create a new feature vector or data entry with datapoint = MyData(name='johndoe', color='green', ...) just like in django: e.g. new_user=User(username='johndoe', email='jd#jd.com');
use the MyData.objects for object management, like MyData.objects.filter(color__eq='yellow');
here is an approach on how the logic could look like.
First you need basically a naive ObjectManager class:
import collections
import operator
import inspect
class ObjectManager(collections.MutableSet):
def __init__(self):
# this will hold a list of all attributes from your custom class, once
# initiated
self._object_attributes = None
self._theset = set()
def add(self, item):
self._theset.add(item)
def discard(self, item):
self._theset.discard(item)
def __iter__(self):
return iter(self._theset)
def __len__(self):
return len(self._theset)
def __contains__(self, item):
try:
return item in self._theset
except AttributeError:
return False
def set_attributes(self, an_object):
self._object_attributes = [
a[0] for a in inspect.getmembers(
an_object, lambda a:not(inspect.isroutine(a))
) if not(a[0].startswith('__') and a[0].endswith('__'))
]
def filter(self, **kwargs):
"""Filters your objects according to one or several conditions
If several filtering conditions are present you can set the
combination mode to either 'and' or 'or'.
"""
mode = kwargs.pop('mode', 'or')
ok_objects = set()
for kw in kwargs:
if '__' in kw:
_kw, op = kw.split('__')
# only allow valid operators
assert op in ('lt', 'le', 'eq', 'ne', 'ge', 'gt')
else:
op = 'eq'
_kw = kw
_oper = getattr(operator, op)
# only allow access to valid object attributes
assert _kw in self._object_attributes
n_objects = (
obj for obj in self
if _oper(getattr(obj, _kw), kwargs[kw])
)
if mode == 'and':
if n_objects:
ok_objects = ok_objects.intersection(n_objects)\
if ok_objects else set(n_objects)
else:
return set()
else:
ok_objects.update(n_objects)
return ok_objects
# feel free to add a `get_or_create`, `create`, etc.
Now you attach an instance of this class as attribute to your MyData class and make sure all new objects are added to it:
class MyData:
# initiate the object manager
objects = ObjectManager()
def __init__(self, uid, name, color):
self.uid = uid
self.name = name
self.color = color
# populate the list of query-able attributes on creation
# of the first instance
if not len(self.objects):
self.objects.set_attributes(self)
# add any new instance to the object manager
self.objects.add(self)
Now you can import your feature vector:
DATA = [
{'uid': 1, 'name': 'brad', 'color':'red'},
{'uid': 2, 'name': 'sylvia', 'color':'blue'},
]
for dat in DATA:
myData(**dat)
or create new instances:
d1 = MyData(uid=10, name='john', color='yellow')
and make use of the manager to filter your objects:
print([md.name for md in MyData.objects.filter(uid__ge=10)])
# > ['john']
print([md.name for md in MyData.objects.filter(mode='and',uid__ge=1,name__eq='john')])
# > ['john']
print([md.name for md in MyData.objects.filter(mode='or',uid__le=4,name__eq='john')])
# > ['john', 'brad', 'sylvia']
If you cannot or don't want to change the class you want an object manager for, and you are willing to monkey patch around (note that I'm not advertising this!) you can even create a ObjectManager that can be hooked to an arbitrary class (built-in types won't work though) after definition or even initiation of some instances.
The idea is to monkey patch __init__ of the target class and add the objects attribute upon init of an instance of your ObjectManager:
import gc
import inspect
import collections
import operator
import wrapt # not standard lib > pip install wrapt
class ObjectManager(collections.MutableSet):
def __init__(self, attach_to):
self._object_attributes = None
# add self as class attribute
attach_to.objects = self
# monkey patch __init__ of your target class
#wrapt.patch_function_wrapper(attach_to, '__init__')
def n_init(wrapped, instance, args, kwargs):
wrapped(*args, **kwargs)
c_objects = instance.__class__.objects
if not c_objects:
c_objects.set_attributes(instance)
c_objects.add(instance)
# make sure to be up to date with the existing instances
self._theset = set(obj for obj in gc.get_objects() if isinstance(obj, attach_to))
# already fetch the attributes if instances exist
if self._theset:
self.set_attributes(next(iter(self._theset)))
...
# the rest is identical to the version above
So now this is how you would use it:
class MyData:
def __init__(self, uid, name, color):
self.uid = uid
self.name = name
self.color = color
# create some instances
DATA = [
{'uid': 1, 'name': 'brad', 'color':'red'},
{'uid': 2, 'name': 'sylvia', 'color':'blue'},
]
my_datas = []
for dat in DATA:
my_datas.append(myData(**dat)) # appending them just to have a reference
# say that ONLY NOW you decide you want to use an object manager
# Simply do:
ObjectManager(MyData)
# and you are done:
print([md.name for md in MyData.objects.filter(mode='or',uid__le=4,name__eq='john')])
# > ['brad', 'sylvia']
# also any object you create from now on is included:
d1 = MyData(uid=10, name='john', color='yellow')
print([md.name for md in MyData.objects.filter(mode='or',uid__le=4,name__eq='john')])
# > ['brad', 'sylvia', 'john']
The following is an example where I'm creating a new NoteQuerySet class which
inherits from django.db.models.QuerySet. After, I'm taking advantage of the
as_manager method, by doing so, the objects manager is overriden preserving
all the operations a manager is supposed to have.
So, in order to get the results you want, I've created a new custom_filter
method, which operates over the NoteQuerySet.data and uses a dictionary for
tracking and making it easy to add new filters.
As you can see, I'm creating a new custom_filter rather than overriding the
objects.filter; this is intentional so you don't lose the native filtering.
Also notice the operator built-in module for mapping easily strings to
operations.
models.py
import operator
from collections import namedtuple
from django.db import models
class NoteQuerySet(models.QuerySet):
data = [
{'id': 1, 'name': 'brad', 'color':'red'},
{'id': 2, 'name': 'sylvia', 'color':'blue'},
{'id': 3, 'name': 'sylwia', 'color':'green'},
{'id': 4, 'name': 'shane', 'color':'red'},
]
allowed_operations = {'gt': operator.gt, 'lt': operator.lt, 'eq': operator.eq}
def custom_filter(self, **kwargs):
"""
>>> kwargs = {'name': 'sylwia', 'id__gt': 1}
dict_items([('name', 'sylwia'), ('id__gt', 1)])
"""
operation = namedtuple('Q', 'op key value')
def parse_filter(item):
"""item is expected to be a tuple with exactly two elements
>>> parse_filter(('id__gt', 2))
Q(op=<built-in function gt>, key='id', value=2)
"""
key, *op = item[0].split('__')
# no value after __ means exact value query, e.g. name='sylvia'
op = op or ['eq']
return operation(self.allowed_operations[op[0]], key, item[1])
filtered_data = self.data.copy()
for item in map(parse_filter, kwargs.items()):
filtered_data = [
entry for entry in filtered_data if item.op(entry[item.key], item.value)
]
return filtered_data
class Note(models.Model):
text = models.CharField(max_length=250)
objects = NoteQuerySet.as_manager()
All the logic till now is implemented in the models module. Next, a possible use case is shown in a ListView.
views.py
from django.views.generic import ListView
from .models import Note
class ResultsApplicationView(ListView):
model = Note
template_name = 'results.html'
def get_context_data(self, **kwargs):
kwargs = super().get_context_data(**kwargs)
if 'extra' not in kwargs:
kwargs['extra'] = self.model.objects.custom_filter(id__lt=3, color='red')
return kwargs
results.html
<h1>Notes</h1>
{% for note in object_list %}
{{note}}
{% endfor %}
{{ extra }}
UPDATE: Non django implementation:
import operator
from collections import namedtuple
class DataQuerySet:
allowed_operations = {
'gt': operator.gt,
'lt': operator.lt,
'eq': operator.eq,
'in': operator.contains,
}
def __init__(self, data):
self.data = data
def filter(self, **kwargs):
"""
>>> kwargs = {'name': 'sylwia', 'id__gt': 1}
>>> DataQuerySet().filter(**kwargs)
[{'id': 3, 'name': 'sylwia', 'color': 'green'}]
"""
operation = namedtuple('Q', 'op key value')
def parse_filter(item):
"""item is expected to be a tuple with exactly two elements
>>> parse_filter(('id__gt', 2))
Q(op=<built-in function gt>, key='id', value=2)
>>> parse_filter(('id__ ', 2))
Q(op=<built-in function eq>, key='id', value=2)
>>> parse_filter(('color__bad', 'red'))
Traceback (most recent call last):
...
AssertionError: 'bad' operation is not allowed
"""
key, *op = item[0].split('__')
# no value after __ means exact value query, e.g. name='sylvia'
op = ''.join(op).strip() or 'eq'
assert op in self.allowed_operations, f'{repr(op)} operation is not allowed'
return operation(self.allowed_operations[op], key, item[1])
filtered_data = self.data.copy()
results = []
for item in map(parse_filter, kwargs.items()):
for entry in filtered_data:
if item.op == operator.contains and all(item.op(entry[item.key], v) for v in item.value):
results.append(entry)
elif item.op(entry[item.key], item.value):
results.append(entry)
return results
class Data:
def __init__(self, data):
self._data = DataQuerySet(data)
#property
def objects(self):
return self._data
if __name__ == '__main__':
data = [
{'id': 1, 'name': 'brad', 'color': 'red', 'tags': ['c++', 'javascript']},
{'id': 2, 'name': 'sylvia', 'color': 'blue', 'tags': ['c++']},
{'id': 3, 'name': 'sylwia', 'color': 'green', 'tags': ['c++', 'javascript', 'python']},
{'id': 4, 'name': 'shane', 'color': 'red', 'tags': ['c++', 'javascript', 'python']},
]
d = Data(data)
print('Entries with id greater than 2:', d.objects.filter(id__gt=2))
print('Entries with color="green":', d.objects.filter(color='green'))
print('Entries with "python" in tags:', d.objects.filter(tags__in=['python']))
__in operation accepts a list of values. This code assumes you want all of them to be present in the tags (that's why we use all(item.op(entry[item.key], v) for v in item.value)).
Is this what you mean?
This solution depends on no external library and uses
**kwargs, generators / closures and the #property decorator. So from a learning point of view it might be interesting.
If you manage to use Django to read the data, that is in your list, then this would probably be much better concerning Django compatibility as my code.
It all depends on what your goal is. (Perfect imitation of django filters) or (learning about how to do a not so perfect imitation, but have the whole source code without dependencies)
DATA = [
{'id': 1, 'name': 'brad', 'color':'red'},
{'id': 2, 'name': 'sylvia', 'color':'blue'},
{'id': 3, 'name': 'paul', 'color':'red'},
{'id': 4, 'name': 'brandon', 'color':'yello'},
{'id': 5, 'name': 'martin', 'color':'green'},
{'id': 6, 'name': 'annie', 'color':'gray'},
]
class UnknownOperator(Exception):
""" custom exception """
class FilterData:
def __init__(self, data):
self.data = data
def _filter_step(self, key, value, data):
if not "__" in key:
return (entry for entry in data if entry[key] == value)
else:
key, operator = key.split("__")
if operator == "gt": # greater than
return (entry for entry in data if entry[key] > value)
elif operator == "lt": # less than
return (entry for entry in data if entry[key] < value)
elif operator == "startswith": # starts with
return (entry for entry in data if entry[key].startswith(value))
elif operator == "in": # starts with
return (entry for entry in data if entry[key] in value)
else:
raise UnknownOperator("operator %s is unknown" % operator)
def _exclude_step(self, key, value, data):
if not "__" in key:
return (entry for entry in data if entry[key] != value)
else:
key, operator = key.split("__")
if operator == "gt": # greater than
return (entry for entry in data if entry[key] <= value)
elif operator == "lt": # less than
return (entry for entry in data if entry[key] >= value)
elif operator == "startswith": # starts with
return (entry for entry in data if not entry[key].startswith(value))
elif operator == "in": # starts with
return (entry for entry in data if entry[key] not in value)
else:
raise UnknownOperator("operator %s is unknown" % operator)
def filter(self, **kwargs):
data = (entry for entry in self.data)
for key, value in kwargs.items():
data = self._filter_step(key, value, data)
return FilterData(data)
def exclude(self, **kwargs):
data = (entry for entry in self.data)
for key, value in kwargs.items():
data = self._exclude_step(key, value, data)
return FilterData(data)
def all(self):
return FilterData(self.data)
def count(self):
cnt = 0
for cnt, entry in enumerate(self.data, 1):
pass
return cnt
def __iter__(self):
for entry in self.data:
yield entry
# make it even more look like django managers / filters
class DataManager:
def __init__(self, data):
self.data = data
#property
def objects(self):
return FilterData(self.data)
fdata = FilterData(DATA)
assert [v["id"] for v in fdata.filter(name="paul")] == [3]
assert [v["id"] for v in fdata.filter(color="red")] == [1, 3]
assert [v["id"] for v in fdata.filter(id__gt=2)] == [3, 4, 5, 6]
assert [v["id"] for v in fdata.filter(color__startswith="gr")] == [5, 6]
fmgr = DataManager(DATA)
assert [v["id"] for v in fmgr.objects.filter(name="paul")] == [3]
assert [v["id"] for v in fmgr.objects.filter(color="red")] == [1, 3]
assert [v["id"] for v in fmgr.objects.filter(id__gt=2)] == [3, 4, 5, 6]
assert [v["id"] for v in fmgr.objects.filter(color__startswith="gr")] == [5, 6]
assert [v["id"] for v in fmgr.objects.filter(color__startswith="gr", id__lt=6)] == [5]
assert [v["id"] for v in fmgr.objects.filter(color__startswith="gr", id__lt=6)] == [5]
assert [v["id"] for v in fmgr.objects.filter(color__startswith="gr").filter(id__lt=6)] == [5]
assert fmgr.objects.filter(color__startswith="gr").filter(id__lt=6).count() == 1
assert fmgr.objects.filter(id__gt=2).count() == 4
assert fmgr.objects.count() == 6
assert [v["id"] for v in fmgr.objects.all()] == list(range(1, 7))

Converting Nested Json into Python object

I have nested json as below
{
"product" : "name",
"protocol" : "scp",
"read_logs" : {
"log_type" : "failure",
"log_url" : "htttp:url"
}
}
I am trying to create Python class object with the below code.
import json
class Config (object):
"""
Argument: JSON Object from the configuration file.
"""
def __init__(self, attrs):
if 'log_type' in attrs:
self.log_type = attrs['log_type']
self.log_url = attrs['log_url']
else:
self.product = attrs["product"]
self.protocol = attrs["protocol"]
def __str__(self):
return "%s;%s" %(self.product, self.log_type)
def get_product(self):
return self.product
def get_logurl(self):
return self.log_url
class ConfigLoader (object):
'''
Create a confiuration loaded which can read JSON config files
'''
def load_config (self, attrs):
with open (attrs) as data_file:
config = json.load(data_file, object_hook=load_json)
return config
def load_json (json_object):
return Config (json_object)
loader = ConfigLoader()
config = loader.load_config('../config/product_config.json')
print config.get_protocol()
But, the object_hook is invoking the load_json recursively and the Class Config init is being called twice. So the final object that I created does not contain the nested JSON data.
Is there any way to read the entire nested JSON object into a single Python class ?
Thanks
A variation on Pankaj Singhal's idea, but using a "generic" namespace class instead of namedtuples:
import json
class Generic:
#classmethod
def from_dict(cls, dict):
obj = cls()
obj.__dict__.update(dict)
return obj
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": "123"}}'
x = json.loads(data, object_hook=Generic.from_dict)
print(x.product, x.read_logs.log_type, x.read_logs.log_url)
namedtuple & object_hook can help create a one-liner:
# Create an object with attributes corresponding to JSON keys.
def json_to_obj(data): return json.loads(data, object_hook=lambda converted_dict: namedtuple('X', converted_dict.keys())(*converted_dict.values()))
OR Create a more readable function like below:
def _object_hook(converted_dict): return namedtuple('X', converted_dict.keys())(*converted_dict.values())
def json_to_obj(data): return json.loads(data, object_hook=_object_hook)
Below is the code snippet to use it:
import json
from collections import namedtuple
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": htttp:url}}'
x = json_to_obj(data)
print x.product, x.read_logs.log_type, x.read_logs.log_url
NOTE: Check out namedtuple's rename parameter.
I wrote a simple DFS algorithm to do this job.
Convert nested item as a flat dictionary. In my case, I joined the keys of json item with a dash.
For example, nested item { "a":[{"b": "c"}, {"d":"e"}] } will be transformed as {'a-0-b': 'c', 'a-1-d': 'e'}.
def DFS(item, headItem, heads, values):
if type(item) == type({}):
for k in item.keys():
DFS(item[k], headItem + [k], heads, values)
elif type(item) == type([]):
for i in range(len(item)):
DFS(item[i], headItem + [str(i)], heads, values)
else:
headItemStr = '-'.join(headItem)
heads.append(headItemStr)
values.append(item)
return
def reduce(jsonItem):
heads, values = [], []
DFS(jsonItem, [], heads, values)
return heads, values
def json2dict(jsonItem):
head, value = reduce(jsonItem)
dictHeadValue = { head[i] : value[i] for i in range(len(head))}
return dictHeadValue

Categories