Converting Nested Json into Python object - python

I have nested json as below
{
"product" : "name",
"protocol" : "scp",
"read_logs" : {
"log_type" : "failure",
"log_url" : "htttp:url"
}
}
I am trying to create Python class object with the below code.
import json
class Config (object):
"""
Argument: JSON Object from the configuration file.
"""
def __init__(self, attrs):
if 'log_type' in attrs:
self.log_type = attrs['log_type']
self.log_url = attrs['log_url']
else:
self.product = attrs["product"]
self.protocol = attrs["protocol"]
def __str__(self):
return "%s;%s" %(self.product, self.log_type)
def get_product(self):
return self.product
def get_logurl(self):
return self.log_url
class ConfigLoader (object):
'''
Create a confiuration loaded which can read JSON config files
'''
def load_config (self, attrs):
with open (attrs) as data_file:
config = json.load(data_file, object_hook=load_json)
return config
def load_json (json_object):
return Config (json_object)
loader = ConfigLoader()
config = loader.load_config('../config/product_config.json')
print config.get_protocol()
But, the object_hook is invoking the load_json recursively and the Class Config init is being called twice. So the final object that I created does not contain the nested JSON data.
Is there any way to read the entire nested JSON object into a single Python class ?
Thanks

A variation on Pankaj Singhal's idea, but using a "generic" namespace class instead of namedtuples:
import json
class Generic:
#classmethod
def from_dict(cls, dict):
obj = cls()
obj.__dict__.update(dict)
return obj
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": "123"}}'
x = json.loads(data, object_hook=Generic.from_dict)
print(x.product, x.read_logs.log_type, x.read_logs.log_url)

namedtuple & object_hook can help create a one-liner:
# Create an object with attributes corresponding to JSON keys.
def json_to_obj(data): return json.loads(data, object_hook=lambda converted_dict: namedtuple('X', converted_dict.keys())(*converted_dict.values()))
OR Create a more readable function like below:
def _object_hook(converted_dict): return namedtuple('X', converted_dict.keys())(*converted_dict.values())
def json_to_obj(data): return json.loads(data, object_hook=_object_hook)
Below is the code snippet to use it:
import json
from collections import namedtuple
data = '{"product": "name", "read_logs": {"log_type": "failure", "log_url": htttp:url}}'
x = json_to_obj(data)
print x.product, x.read_logs.log_type, x.read_logs.log_url
NOTE: Check out namedtuple's rename parameter.

I wrote a simple DFS algorithm to do this job.
Convert nested item as a flat dictionary. In my case, I joined the keys of json item with a dash.
For example, nested item { "a":[{"b": "c"}, {"d":"e"}] } will be transformed as {'a-0-b': 'c', 'a-1-d': 'e'}.
def DFS(item, headItem, heads, values):
if type(item) == type({}):
for k in item.keys():
DFS(item[k], headItem + [k], heads, values)
elif type(item) == type([]):
for i in range(len(item)):
DFS(item[i], headItem + [str(i)], heads, values)
else:
headItemStr = '-'.join(headItem)
heads.append(headItemStr)
values.append(item)
return
def reduce(jsonItem):
heads, values = [], []
DFS(jsonItem, [], heads, values)
return heads, values
def json2dict(jsonItem):
head, value = reduce(jsonItem)
dictHeadValue = { head[i] : value[i] for i in range(len(head))}
return dictHeadValue

Related

Use Variable As Dictionary Key Set

I parse a JSON file to a dictionary, example JSON data below
{
"environmental": {
"temprature": {
"test" : "temprature",
"unit": "c",
"now": 12.65,
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03"
}
}
}
I would like to either retrieve a value or set a value using a list tuple or string as the dictionary key.
var_lst_key = ["environmental", "temprature", "now"]
var_dict_x[var_lst_key] = "x"
or
print(var_dict_x[var_lst_key])
Part 1: Doing it the easy way: using functions
A nested lookup is pretty easy to do. You iterate over the keys, and keep replacing the object you're looking into with the value at the key you're currently looking at:
def nested_get(obj, keys):
for key in keys:
obj = obj[key]
return obj
def nested_set(obj, keys, value):
for key in keys[:-1]:
# Drill down until the penultimate key
obj = obj[key]
# Set value of last key
obj[keys[-1]] = value
To run this:
jstr = """{ "environmental": {
"temprature": {
"test" : "temprature",
"unit": "c",
"now": 12.65,
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03"
}
}}"""
jobj = json.loads(jstr)
var_lst_key = ["environmental", "temprature", "now"]
nested_lookup(jobj, var_lst_key) # Returns 12.65
invalid_keys = ["environmental", "temprature", "hello"]
nested_lookup(jobj, invalid_keys) # throws KeyError 'hello'
nested_set(jobj, var_lst_key, "HELLO!")
nested_lookup(jobj, var_lst_key) # Returns HELLO!
Part 2: Doing it the fancy way: using a derived class
Now if you really want to use the dict[key] = value syntax, you're going to have to extend the dict class to override its __getitem__() and __setitem__() methods.
class NestedLookupDict(dict):
def __init__(self, *args, **kwargs):
super(type(self), self).__init__(*args, **kwargs)
self.insert_missing_keys = True
def __getitem__(self, indices):
# Get the super dictionary for easy lookup
obj = self
for i in indices:
obj = dict.__getitem__(obj, i)
return obj
def __setitem__(self, indices, value):
# Get the base dictionary
obj = self
# Drill down until the penultimate key
for i in indices[:-1]:
# Insert a new dict if a key is missing
if self.insert_missing_keys and not dict.__contains__(obj, i):
dict.__setitem__(obj, i, dict())
obj = dict.__getitem__(obj, i)
# Set the value at the final key
dict.__setitem__(obj, indices[-1], value)
To use this, let's use the json object parsed from the string like before:
# jobj = {... something ...}
nested_dict = NestedLookupDict(jobj)
print(nested_dict[var_lst_key]) # Prints 12.65
nested_dict[var_lst_key] = "HELLO!"
print(nested_dict[var_lst_key]) # Prints HELLO!
When nested_dict.insert_missing_keys is set to True (by default), the __setitem__() method adds missing dictionaries if required.
newkey = ["environmental", "temprature", "newkey"]
nested_dict[newkey] = "NEWKEY!!!"
print(nested_dict[newkey]) # Prints NEWKEY!!!
newkey2 = ["environmental", "temprature", "nested", "newkey"]
nested_dict[newkey2] = "NESTEDNEWKEY!!!"
print(nested_dict[newkey2]) # Prints NESTEDNEWKEY!!!
At the end of all this, you can dump the object to json to see what it looks like:
print(json.dumps(nested_dict))
# Output:
{
"environmental": {
"temprature": {
"test": "temprature",
"unit": "c",
"now": "HELLO!",
"now_timestamp": "10-06-2019 08:02:18",
"min": "12.5",
"min_timestamp": "03-06-2019 07:40:02",
"max": "32.84",
"max_timestamp": "03-06-2019 04:30:03",
"newkey": "NEWKEY!!!",
"nested": {
"newkey": "NESTEDNEWKEY!!!"
}
}
}
}
Part 3: Way overkill, but oh! so much fun to code: Souped up NestedLookupDict
Additional features to so you can use it almost like dict:
Delete keys using del nested_dict[key]
Check if keys exist with key in nested_dict
nested_dict.get(key, default) absorbs the KeyError if key doesn't exist, and returns default
Implemented a type check on keys: they must be list or tuple now
Quirks:
Because of the way __getitem__() is implemented, nested_dict[empty_list] returns a reference to nested_dict (itself). If this is a bad thing, a check for empty keys could be added. However, I don't see any problems coming from leaving it this way. Some consequences of this quirk:
To keep this behavior consistent with how __contains__() works, the __contains__() function returns True for empty key. [] in nested_list := True
You cannot, by definition, set nested_list[[]]. That throws a ValueError
class NestedLookupDict(dict):
def __init__(self, *args, **kwargs):
super(type(self), self).__init__(*args, **kwargs)
self.insert_missing_keys = True
def check_keys(self, keys):
if not isinstance(keys, (list, tuple)):
raise TypeError("keys must be of type list or tuple")
def get(self, keys, default=None):
self.check_keys(keys)
try:
return self.__getitem__(keys)
except KeyError:
return default
def __contains__(self, keys):
self.check_keys(keys)
if not keys: return True # nested_dict contains the key [] = itself
if len(keys) > 1:
return self.__getitem__(keys[:-1]).__contains__(keys[-1])
else:
return dict.__contains__(self, keys[0])
def __delitem__(self, keys):
self.check_keys(keys)
obj = self
for i in keys[:-1]:
obj = dict.__getitem__(obj, i)
dict.__delitem__(obj, keys[-1])
def __getitem__(self, keys):
self.check_keys(keys)
# Get the super dictionary for easy lookup
obj = self
for i in keys:
obj = dict.__getitem__(obj, i)
return obj
def __setitem__(self, keys, value):
self.check_keys(keys)
if not keys: raise ValueError("keys cannot be empty")
# Get the base dictionary
obj = self
# Drill down until the penultimate key
for i in keys[:-1]:
# Insert a new dict if a key is missing
if self.insert_missing_keys and not dict.__contains__(obj, i):
dict.__setitem__(obj, i, dict())
obj = dict.__getitem__(obj, i)
# Set the value at the final key
dict.__setitem__(obj, keys[-1], value)
You can use the json and load it as dict as follows:
# importing the module
import json
# Opening JSON file
with open('data.json') as json_file:
data = json.load(json_file)
# Print the type of data variable
print("Type:", type(data))
# Print the data of dictionary
print("\nPeople1:", data['people1'])
print("\nPeople2:", data['people2'])
The following code outputs each element from dictionary using keys

Python Refactor JSON into different JSON Structure

I have a bunch of JSON data that I did mostly by hand. Several thousand lines. I need to refactor it into a totally different format using Python.
An overview of my 'stuff':
Column: The basic 'unit' of my data. Each Column has attributes. Don't worry about the meaning of the attributes, but the attributes need to be retained for each Column if they exist.
Folder: Folders group Columns and other Folders together. The folders currently have no attributes, they (currently) only contain other Folder and Column objects (Object does not necessarily refer to JSON objects here... more of an 'entity')
Universe: Universes group everything into big chunks which, in the larger scope of my project, are unable to interact with each other. That is not important here, but that's what they do.
Some limitations:
Columns cannot contain other Column objects, Folder objects, or Universe objects.
Folders cannot contain Universe objects.
Universes cannot contain other Universe objects.
Currently, I have Columns in this form:
"Column0Name": {
"type": "a type",
"dtype": "data type",
"description": "abcdefg"
}
and I need it to go to:
{
"name": "Column0Name",
"type": "a type",
"dtype": "data type",
"description": "abcdefg"
}
Essentially I need to convert the Column key-value things to an array of things (I am new to JSON, don't know the terminology). I also need each Folder to end up with two new JSON arrays (in addition to the "name": "FolderName" key-value pair). It needs a "folders": [] and "columns": [] to be added. So I have this for folders:
"Folder0Name": {
"Column0Name": {
"type": "a",
"dtype": "b",
"description": "c"
},
"Column1Name": {
"type": "d",
"dtype": "e",
"description": "f"
}
}
and need to go to this:
{
"name": "Folder0Name",
"folders": [],
"columns": [
{"name": "Column0Name", "type": "a", "dtype": "b", "description": "c"},
{"name": "Column1Name", "type": "d", "dtype": "e", "description": "f"}
]
}
The folders will also end up in an array inside its parent Universe. Likewise, each Universe will end up with "name", "folders", and "columns" things. As such:
{
"name": "Universe0",
"folders": [a bunch of folders in a JSON array],
"columns": [occasionally some columns in a JSON array]
}
Bottom line:
I'm going to guess that I need a recursive function to iterate though all the nested dictionaries after I import the JSON data with the json Python module.
I'm thinking some sort of usage of yield might help but I'm not super familiar yet with it.
Would it be easier to update the dicts as I go, or destroy each key-value pairs and construct an entirely new dict as I go?
Here is what I have so far. I'm stuck on getting the generator to return actual dictionaries instead of a generator object.
import json
class AllUniverses:
"""Container to hold all the Universes found in the json file"""
def __init__(self, filename):
self._fn = filename
self.data = {}
self.read_data()
def read_data(self):
with open(self._fn, 'r') as fin:
self.data = json.load(fin)
return self
def universe_key(self):
"""Get the next universe key from the dict of all universes
The key will be used as the name for the universe.
"""
yield from self.data
class Universe:
def __init__(self, json_filename):
self._au = AllUniverses(filename=json_filename)
self.uni_key = self._au.universe_key()
self._universe_data = self._au.data.copy()
self._col_attrs = ['type', 'dtype', 'description', 'aggregation']
self._folders_list = []
self._columns_list = []
self._type = "Universe"
self._name = ""
self.uni = dict()
self.is_folder = False
self.is_column = False
def output(self):
# TODO: Pass this to json.dump?
# TODO: Still need to get the actual folder and column dictionaries
# from the generators
out = {
"name": self._name,
"type": "Universe",
"folder": [f.me for f in self._folders_list],
"columns": [c.me for c in self._columns_list]}
return out
def update_universe(self):
"""Get the next universe"""
universe_k = next(self.uni_key)
self._name = str(universe_k)
self.uni = self._universe_data.pop(universe_k)
return self
def parse_nodes(self):
"""Process all child nodes"""
nodes = [_ for _ in self.uni.keys()]
for k in nodes:
v = self.uni.pop(k)
self._is_column(val=v)
if self.is_column:
fc = Column(data=v, key_name=k)
self._columns_list.append(fc)
else:
fc = Folder(data=v, key_name=k)
self._folders_list.append(fc)
return self
def _is_column(self, val):
"""Determine if val is a Column or Folder object"""
self.is_folder = False
self._column = False
if isinstance(val, dict) and not val:
self.is_folder = True
elif not isinstance(val, dict):
raise TypeError('Cannot handle inputs not of type dict')
elif any([i in val.keys() for i in self._col_attrs]):
self._column = True
else:
self.is_folder = True
return self
def parse_children(self):
for folder in self._folders_list:
assert(isinstance(folder, Folder)), f'bletch idk what happened'
folder.parse_nodes()
class Folder:
def __init__(self, data, key_name):
self._data = data.copy()
self._name = str(key_name)
self._node_keys = [_ for _ in self._data.keys()]
self._folders = []
self._columns = []
self._col_attrs = ['type', 'dtype', 'description', 'aggregation']
#property
def me(self):
# maybe this should force the code to parse all children of this
# Folder? Need to convert the generator into actual dictionaries
return {"name": self._name, "type": "Folder",
"columns": [(c.me for c in self._columns)],
"folders": [(f.me for f in self._folders)]}
def parse_nodes(self):
"""Parse all the children of this Folder
Parse through all the node names. If it is detected to be a Folder
then create a Folder obj. from it and add to the list of Folder
objects. Else create a Column obj. from it and append to the list
of Column obj.
This should be appending dictionaries
"""
for key in self._node_keys:
_folder = False
_column = False
values = self._data.copy()[key]
if isinstance(values, dict) and not values:
_folder = True
elif not isinstance(values, dict):
raise TypeError('Cannot handle inputs not of type dict')
elif any([i in values.keys() for i in self._col_attrs]):
_column = True
else:
_folder = True
if _folder:
f = Folder(data=values, key_name=key)
self._folders.append(f.me)
else:
c = Column(data=values, key_name=key)
self._columns.append(c.me)
return self
class Column:
def __init__(self, data, key_name):
self._data = data.copy()
self._stupid_check()
self._me = {
'name': str(key_name),
'type': 'Column',
'ctype': self._data.pop('type'),
'dtype': self._data.pop('dtype'),
'description': self._data.pop('description'),
'aggregation': self._data.pop('aggregation')}
def __str__(self):
# TODO: pretty sure this isn't correct
return str(self.me)
#property
def me(self):
return self._me
def to_json(self):
# This seems to be working? I think?
return json.dumps(self, default=lambda o: str(self.me)) # o.__dict__)
def _stupid_check(self):
"""If the key isn't in the dictionary, add it"""
keys = [_ for _ in self._data.keys()]
keys_defining_a_column = ['type', 'dtype', 'description', 'aggregation']
for json_key in keys_defining_a_column:
if json_key not in keys:
self._data[json_key] = ""
return self
if __name__ == "__main__":
file = r"dummy_json_data.json"
u = Universe(json_filename=file)
u.update_universe()
u.parse_nodes()
u.parse_children()
print('check me')
And it gives me this:
{
"name":"UniverseName",
"type":"Universe",
"folder":[
{"name":"Folder0Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB0B0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB190>]
},
{"name":"Folder2Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB040>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB120>]
},
{"name":"Folder4Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB270>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB200>]
},
{"name":"Folder6Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB2E0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB350>]
},
{"name":"Folder8Name",
"type":"Folder",
"columns":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB3C0>],
"folders":[<generator object Folder.me.<locals>.<genexpr> at 0x000001ACFBEDB430>]
}
],
"columns":[]
}
If there is an existing tool for this kind of transformation so that I don't have to write Python code, that would be an attractive alternative, too.
Lets create the 3 classes needed to represent Columns, Folders and Unverses. Before starting some topics I wanna talk about, I give a short description of them here, if any of them is new to you I can go deeper:
I will use type annotations to make clear what type each variable is.
I am gonna use __slots__. By telling the Column class that its instances are gonna have a name, ctype, dtype, description and aggragation attributes, each instance of Column will require less memory space. The downside is that it will not accept any other attribute not listed there. This is, it saves memory but looses flexibility. As we are going to have several (maybe hundreds or thousands) of instances, reduced memory footprint seems more important than the flexibility of being able to add any attribute.
Each class will have the standard constructor where every argument has a default value except name, which is mandatory.
Each class will have another constructor called from_old_syntax. It is going to be a class method that receives the string corresponding to the name and a dict corresponding to the data as its arguments and outputs the corresponding instance (Column, Folder or Universe).
Universes are basically the same as Folders with different names (for now) so it will basically inherit it (class Universe(Folder): pass).
from typing import List
class Column:
__slots__ = 'name', 'ctype', 'dtype', 'description', 'aggregation'
def __init__(
self,
name: str,
ctype: str = '',
dtype: str = '',
description: str = '',
aggregation: str = '',
) -> None:
self.name = name
self.ctype = ctype
self.dtype = dtype
self.description = description
self.aggregation = aggregation
#classmethod
def from_old_syntax(cls, name: str, data: dict) -> "Column":
column = cls(name)
for key, value in data.items():
# The old syntax used type for column type but in the new syntax it
# will have another meaning so we use ctype instead
if key == 'type':
key = 'ctype'
try:
setattr(column, key, value)
except AttributeError as e:
raise AttributeError(f"Unexpected key {key} for Column") from e
return column
class Folder:
__slots__ = 'name', 'folders', 'columns'
def __init__(
self,
name: str,
columns: List[Column] = None,
folders: List["Folder"] = None,
) -> None:
self.name = name
if columns is None:
self.columns = []
else:
self.columns = [column for column in columns]
if folders is None:
self.folders = []
else:
self.folders = [folder for folder in folders]
#classmethod
def from_old_syntax(cls, name: str, data: dict) -> "Folder":
columns = [] # type: List[Column]
folders = [] # type: List["Folder"]
for key, value in data.items():
# Determine if it is a Column or a Folder
if 'type' in value and 'dtype' in value:
columns.append(Column.from_old_syntax(key, value))
else:
folders.append(Folder.from_old_syntax(key, value))
return cls(name, columns, folders)
class Universe(Folder):
pass
As you can see the constructors are pretty trivial, assign the arguments to the attributes and done. In the case of Folders (and thus in Universes too), two arguments are lists of columns and folders. The default value is None (in this case we initialize as an empty list) because using mutable variables as default values has some issues so it is good practice to use None as the default value for mutable variables (such as lists).
Column's from_old_syntax class method creates an empty Column with the provided name. Afterwards we iterate over the data dict that was also provided and assign its key value pair to its corresponding attribute. There is a special case where "type" key is converted to "ctype" as "type" is going to be used for a different purpose with the new syntax. The assignation itself is done by setattr(column, key, value). We have included it inside a try ... except ... clause because as we said above, only the items in __slots__ can be used as attributes, so if there is an attribute that you forgot, you will get an exception saying "AttributeError: Unexpected key 'NAME'" and you will only have to add that "NAME" to the __slots__.
Folder's (and thus Unverse's) from_old_syntax class method is even simpler. Create a list of columns and folders, iterate over the data checking if it is a folder or a column and use the appropiate from_old_syntax class method. Then use those two lists and the provided name to return the instance. Notice that Folder.from_old_syntax notation is used to create the folders instead of cls.from_old_syntax because cls may be Universe. However, to create the insdance we do use cls(...) as here we do want to use Universe or Folder.
Now you could do universes = [Universe.from_old_syntax(name, data) for name, data in json.load(f).items()] where f is the file and you will get all your Universes, Folders and Columns in memory. So now we need to encode them back to JSON. For this we are gonna extend the json.JSONEncoder so that it knows how to parse our classes into dictionaries that it can encode normally. To do so, you just need to overwrite the default method, check if the object passed is of our classes and return a dict that will be encoded. If it is not one of our classes we will let the parent default method to take care of it.
import json
# JSON fields with this values will be omitted
EMPTY_VALUES = "", [], {}
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, (Column, Folder, Universe)):
# Make a dict with every item in their respective __slots__
data = {
attr: getattr(obj, attr) for attr in obj.__slots__
if getattr(obj, attr) not in EMPTY_VALUES
}
# Add the type fild with the class name
data['type'] = obj.__class__.__name__
return data
# Use the parent class function for any object not handled explicitly
super().default(obj)
Converting the classes to dictionaries is basically taking what is in __slots__ as the key and the attribute's value as the value. We will filter those values that are an empty string, an empty list or an empty dict as we do not need to write them to JSON. We finally add the "type" key to the dict by reading the objects class name (Column, Folder and Universe).
To use it you have to pass the CustomEncoder as the cls argument to json.dump.
So the code will look like this (omitting the class definitions to keep it short):
import json
from typing import List
# JSON fields with this values will be omitted
EMPTY_VALUES = "", [], {}
class Column:
# ...
class Folder:
# ...
class Universe(Folder):
pass
class CustomEncoder(json.JSONEncoder):
# ...
if __name__ == '__main__':
with open('dummy_json_data.json', 'r') as f_in, open('output.json', 'w') as f_out:
universes = [Universe.from_old_syntax(name, data)
for name, data in json.load(f_in).items()]
json.dump(universes, f_out, cls=CustomEncoder, indent=4)

Decorator: Maintain state

I need to compose information regarding the given information like what parameter the given function takes etc. The example what I would like to do is
#author("Joey")
#parameter("name", type=str)
#parameter("id", type=int)
#returns("Employee", desc="Returns employee with given details", type="Employee")
def get_employee(name, id):
//
// Some logic to return employee
//
Skeleton of decorator could be as follows:
json = {}
def author(author):
def wrapper(func):
def internal(*args, **kwargs):
json["author"] = name
func(args, kwargs)
return internal
return wrapepr
Similarly, parameter decorator could be written as follows:
def parameter(name, type=None):
def wrapper(func):
def internal(*args, **kwargs):
para = {}
para["name"] = name
para["type"] = type
json["parameters"].append = para
func(args, kwargs)
return internal
return wrapepr
Similarly, other handlers could be written. At the end, I can just call one function which would get all formed JSONs for each function.
End output could be
[
{fun_name, "get_employee", author: "Joey", parameters : [{para_name : Name, type: str}, ... ], returns: {type: Employee, desc: "..."}
{fun_name, "search_employee", author: "Bob", parameters : [{para_name : age, type: int}, ... ], returns: {type: Employee, desc: "..."}
...
}
]
I'm not sure how I can maintain the state and know to consolidate the data regarding one function should be handled together.
How can I achieve this?
I don't know if I fully get your use case, but wouldn't it work to add author to your current functions as:
func_list = []
def func(var):
return var
json = {}
json['author'] = 'JohanL'
json['func'] = func.func_name
func.json = json
func_list.append(func.json)
def func2(var):
return var
json = {}
json['author'] = 'Ganesh'
func2.json = json
func_list.append(func2.json)
This can be automated using a decorator as follows:
def author(author):
json = {}
def author_decorator(func):
json['func'] = func.func_name
json['author'] = author
func.json = json
return func
return author_decorator
def append(func_list):
def append_decorator(func):
func_list.append(func.json)
return func
return append_decorator
func_list = []
#append(func_list)
#author('JohanL')
def func(var):
return var
#append(func_list)
#author('Ganesh')
def func2(var):
return var
Then you can access the json dict as func.json and func2.json or find the functions in the func_list. Note that for the decorators to work, you have to add them in the order I have put them and I have not added any error handling.
Also, if you prefer the func_list to not be explicitly passed, but instead use a globaly defined list with an explicit name, the code can be somewhat simplified to:
func_list = []
def author(author):
json = {}
def author_decorator(func):
json['func'] = func.func_name
json['author'] = author
func.json = json
return func
return author_decorator
def append(func):
global func_list
func_list.append(func.json)
return func
#append
#author('JohanL')
def func(var):
return var
#append
#author('Ganesh')
def func2(var):
return var
Maybe this is sufficient for you?

How to serialize SqlAlchemy result to JSON?

Django has some good automatic serialization of ORM models returned from DB to JSON format.
How to serialize SQLAlchemy query result to JSON format?
I tried jsonpickle.encode but it encodes query object itself.
I tried json.dumps(items) but it returns
TypeError: <Product('3', 'some name', 'some desc')> is not JSON serializable
Is it really so hard to serialize SQLAlchemy ORM objects to JSON /XML? Isn't there any default serializer for it? It's very common task to serialize ORM query results nowadays.
What I need is just to return JSON or XML data representation of SQLAlchemy query result.
SQLAlchemy objects query result in JSON/XML format is needed to be used in javascript datagird (JQGrid http://www.trirand.com/blog/)
You could just output your object as a dictionary:
class User:
def as_dict(self):
return {c.name: getattr(self, c.name) for c in self.__table__.columns}
And then you use User.as_dict() to serialize your object.
As explained in Convert sqlalchemy row object to python dict
A flat implementation
You could use something like this:
from sqlalchemy.ext.declarative import DeclarativeMeta
class AlchemyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
# an SQLAlchemy class
fields = {}
for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata']:
data = obj.__getattribute__(field)
try:
json.dumps(data) # this will fail on non-encodable values, like other classes
fields[field] = data
except TypeError:
fields[field] = None
# a json-encodable dict
return fields
return json.JSONEncoder.default(self, obj)
and then convert to JSON using:
c = YourAlchemyClass()
print json.dumps(c, cls=AlchemyEncoder)
It will ignore fields that are not encodable (set them to 'None').
It doesn't auto-expand relations (since this could lead to self-references, and loop forever).
A recursive, non-circular implementation
If, however, you'd rather loop forever, you could use:
from sqlalchemy.ext.declarative import DeclarativeMeta
def new_alchemy_encoder():
_visited_objs = []
class AlchemyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
# don't re-visit self
if obj in _visited_objs:
return None
_visited_objs.append(obj)
# an SQLAlchemy class
fields = {}
for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata']:
fields[field] = obj.__getattribute__(field)
# a json-encodable dict
return fields
return json.JSONEncoder.default(self, obj)
return AlchemyEncoder
And then encode objects using:
print json.dumps(e, cls=new_alchemy_encoder(), check_circular=False)
This would encode all children, and all their children, and all their children... Potentially encode your entire database, basically. When it reaches something its encoded before, it will encode it as 'None'.
A recursive, possibly-circular, selective implementation
Another alternative, probably better, is to be able to specify the fields you want to expand:
def new_alchemy_encoder(revisit_self = False, fields_to_expand = []):
_visited_objs = []
class AlchemyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
# don't re-visit self
if revisit_self:
if obj in _visited_objs:
return None
_visited_objs.append(obj)
# go through each field in this SQLalchemy class
fields = {}
for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata']:
val = obj.__getattribute__(field)
# is this field another SQLalchemy object, or a list of SQLalchemy objects?
if isinstance(val.__class__, DeclarativeMeta) or (isinstance(val, list) and len(val) > 0 and isinstance(val[0].__class__, DeclarativeMeta)):
# unless we're expanding this field, stop here
if field not in fields_to_expand:
# not expanding this field: set it to None and continue
fields[field] = None
continue
fields[field] = val
# a json-encodable dict
return fields
return json.JSONEncoder.default(self, obj)
return AlchemyEncoder
You can now call it with:
print json.dumps(e, cls=new_alchemy_encoder(False, ['parents']), check_circular=False)
To only expand SQLAlchemy fields called 'parents', for example.
Python 3.7+ and Flask 1.1+ can use the built-in dataclasses package
from dataclasses import dataclass
from datetime import datetime
from flask import Flask, jsonify
from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__)
db = SQLAlchemy(app)
#dataclass
class User(db.Model):
id: int
email: str
id = db.Column(db.Integer, primary_key=True, auto_increment=True)
email = db.Column(db.String(200), unique=True)
#app.route('/users/')
def users():
users = User.query.all()
return jsonify(users)
if __name__ == "__main__":
users = User(email="user1#gmail.com"), User(email="user2#gmail.com")
db.create_all()
db.session.add_all(users)
db.session.commit()
app.run()
The /users/ route will now return a list of users.
[
{"email": "user1#gmail.com", "id": 1},
{"email": "user2#gmail.com", "id": 2}
]
Auto-serialize related models
#dataclass
class Account(db.Model):
id: int
users: User
id = db.Column(db.Integer)
users = db.relationship(User) # User model would need a db.ForeignKey field
The response from jsonify(account) would be this.
{
"id":1,
"users":[
{
"email":"user1#gmail.com",
"id":1
},
{
"email":"user2#gmail.com",
"id":2
}
]
}
Overwrite the default JSON Encoder
from flask.json import JSONEncoder
class CustomJSONEncoder(JSONEncoder):
"Add support for serializing timedeltas"
def default(o):
if type(o) == datetime.timedelta:
return str(o)
if type(o) == datetime.datetime:
return o.isoformat()
return super().default(o)
app.json_encoder = CustomJSONEncoder
You can convert a RowProxy to a dict like this:
d = dict(row.items())
Then serialize that to JSON ( you will have to specify an encoder for things like datetime values )
It's not that hard if you just want one record ( and not a full hierarchy of related records ).
json.dumps([(dict(row.items())) for row in rs])
I recommend using marshmallow. It allows you to create serializers to represent your model instances with support to relations and nested objects.
Here is a truncated example from their docs. Take the ORM model, Author:
class Author(db.Model):
id = db.Column(db.Integer, primary_key=True)
first = db.Column(db.String(80))
last = db.Column(db.String(80))
A marshmallow schema for that class is constructed like this:
class AuthorSchema(Schema):
id = fields.Int(dump_only=True)
first = fields.Str()
last = fields.Str()
formatted_name = fields.Method("format_name", dump_only=True)
def format_name(self, author):
return "{}, {}".format(author.last, author.first)
...and used like this:
author_schema = AuthorSchema()
author_schema.dump(Author.query.first())
...would produce an output like this:
{
"first": "Tim",
"formatted_name": "Peters, Tim",
"id": 1,
"last": "Peters"
}
Have a look at their full Flask-SQLAlchemy Example.
A library called marshmallow-sqlalchemy specifically integrates SQLAlchemy and marshmallow. In that library, the schema for the Author model described above looks like this:
class AuthorSchema(ModelSchema):
class Meta:
model = Author
The integration allows the field types to be inferred from the SQLAlchemy Column types.
marshmallow-sqlalchemy here.
You can use introspection of SqlAlchemy as this :
mysql = SQLAlchemy()
from sqlalchemy import inspect
class Contacts(mysql.Model):
__tablename__ = 'CONTACTS'
id = mysql.Column(mysql.Integer, primary_key=True)
first_name = mysql.Column(mysql.String(128), nullable=False)
last_name = mysql.Column(mysql.String(128), nullable=False)
phone = mysql.Column(mysql.String(128), nullable=False)
email = mysql.Column(mysql.String(128), nullable=False)
street = mysql.Column(mysql.String(128), nullable=False)
zip_code = mysql.Column(mysql.String(128), nullable=False)
city = mysql.Column(mysql.String(128), nullable=False)
def toDict(self):
return { c.key: getattr(self, c.key) for c in inspect(self).mapper.column_attrs }
#app.route('/contacts',methods=['GET'])
def getContacts():
contacts = Contacts.query.all()
contactsArr = []
for contact in contacts:
contactsArr.append(contact.toDict())
return jsonify(contactsArr)
#app.route('/contacts/<int:id>',methods=['GET'])
def getContact(id):
contact = Contacts.query.get(id)
return jsonify(contact.toDict())
Get inspired from an answer here :
Convert sqlalchemy row object to python dict
Flask-JsonTools package has an implementation of JsonSerializableBase Base class for your models.
Usage:
from sqlalchemy.ext.declarative import declarative_base
from flask.ext.jsontools import JsonSerializableBase
Base = declarative_base(cls=(JsonSerializableBase,))
class User(Base):
#...
Now the User model is magically serializable.
If your framework is not Flask, you can just grab the code
For security reasons you should never return all the model's fields. I prefer to selectively choose them.
Flask's json encoding now supports UUID, datetime and relationships (and added query and query_class for flask_sqlalchemy db.Model class). I've updated the encoder as follows:
app/json_encoder.py
from sqlalchemy.ext.declarative import DeclarativeMeta
from flask import json
class AlchemyEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o.__class__, DeclarativeMeta):
data = {}
fields = o.__json__() if hasattr(o, '__json__') else dir(o)
for field in [f for f in fields if not f.startswith('_') and f not in ['metadata', 'query', 'query_class']]:
value = o.__getattribute__(field)
try:
json.dumps(value)
data[field] = value
except TypeError:
data[field] = None
return data
return json.JSONEncoder.default(self, o)
app/__init__.py
# json encoding
from app.json_encoder import AlchemyEncoder
app.json_encoder = AlchemyEncoder
With this I can optionally add a __json__ property that returns the list of fields I wish to encode:
app/models.py
class Queue(db.Model):
id = db.Column(db.Integer, primary_key=True)
song_id = db.Column(db.Integer, db.ForeignKey('song.id'), unique=True, nullable=False)
song = db.relationship('Song', lazy='joined')
type = db.Column(db.String(20), server_default=u'audio/mpeg')
src = db.Column(db.String(255), nullable=False)
created_at = db.Column(db.DateTime, server_default=db.func.now())
updated_at = db.Column(db.DateTime, server_default=db.func.now(), onupdate=db.func.now())
def __init__(self, song):
self.song = song
self.src = song.full_path
def __json__(self):
return ['song', 'src', 'type', 'created_at']
I add #jsonapi to my view, return the resultlist and then my output is as follows:
[
{
"created_at": "Thu, 23 Jul 2015 11:36:53 GMT",
"song":
{
"full_path": "/static/music/Audioslave/Audioslave [2002]/1 Cochise.mp3",
"id": 2,
"path_name": "Audioslave/Audioslave [2002]/1 Cochise.mp3"
},
"src": "/static/music/Audioslave/Audioslave [2002]/1 Cochise.mp3",
"type": "audio/mpeg"
}
]
A more detailed explanation.
In your model, add:
def as_dict(self):
return {c.name: str(getattr(self, c.name)) for c in self.__table__.columns}
The str() is for python 3 so if using python 2 use unicode(). It should help deserialize dates. You can remove it if not dealing with those.
You can now query the database like this
some_result = User.query.filter_by(id=current_user.id).first().as_dict()
First() is needed to avoid weird errors. as_dict() will now deserialize the result. After deserialization, it is ready to be turned to json
jsonify(some_result)
While the original question goes back awhile, the number of answers here (and my own experiences) suggest it's a non-trivial question with a lot of different approaches of varying complexity with different trade-offs.
That's why I built the SQLAthanor library that extends SQLAlchemy's declarative ORM with configurable serialization/de-serialization support that you might want to take a look at.
The library supports:
Python 2.7, 3.4, 3.5, and 3.6.
SQLAlchemy versions 0.9 and higher
serialization/de-serialization to/from JSON, CSV, YAML, and Python dict
serialization/de-serialization of columns/attributes, relationships, hybrid properties, and association proxies
enabling and disabling of serialization for particular formats and columns/relationships/attributes (e.g. you want to support an inbound password value, but never include an outbound one)
pre-serialization and post-deserialization value processing (for validation or type coercion)
a pretty straightforward syntax that is both Pythonic and seamlessly consistent with SQLAlchemy's own approach
You can check out the (I hope!) comprehensive docs here: https://sqlathanor.readthedocs.io/en/latest
Hope this helps!
Custom serialization and deserialization.
"from_json" (class method) builds a Model object based on json data.
"deserialize" could be called only on instance, and merge all data from json into Model instance.
"serialize" - recursive serialization
__write_only__ property is needed to define write only properties ("password_hash" for example).
class Serializable(object):
__exclude__ = ('id',)
__include__ = ()
__write_only__ = ()
#classmethod
def from_json(cls, json, selfObj=None):
if selfObj is None:
self = cls()
else:
self = selfObj
exclude = (cls.__exclude__ or ()) + Serializable.__exclude__
include = cls.__include__ or ()
if json:
for prop, value in json.iteritems():
# ignore all non user data, e.g. only
if (not (prop in exclude) | (prop in include)) and isinstance(
getattr(cls, prop, None), QueryableAttribute):
setattr(self, prop, value)
return self
def deserialize(self, json):
if not json:
return None
return self.__class__.from_json(json, selfObj=self)
#classmethod
def serialize_list(cls, object_list=[]):
output = []
for li in object_list:
if isinstance(li, Serializable):
output.append(li.serialize())
else:
output.append(li)
return output
def serialize(self, **kwargs):
# init write only props
if len(getattr(self.__class__, '__write_only__', ())) == 0:
self.__class__.__write_only__ = ()
dictionary = {}
expand = kwargs.get('expand', ()) or ()
prop = 'props'
if expand:
# expand all the fields
for key in expand:
getattr(self, key)
iterable = self.__dict__.items()
is_custom_property_set = False
# include only properties passed as parameter
if (prop in kwargs) and (kwargs.get(prop, None) is not None):
is_custom_property_set = True
iterable = kwargs.get(prop, None)
# loop trough all accessible properties
for key in iterable:
accessor = key
if isinstance(key, tuple):
accessor = key[0]
if not (accessor in self.__class__.__write_only__) and not accessor.startswith('_'):
# force select from db to be able get relationships
if is_custom_property_set:
getattr(self, accessor, None)
if isinstance(self.__dict__.get(accessor), list):
dictionary[accessor] = self.__class__.serialize_list(object_list=self.__dict__.get(accessor))
# check if those properties are read only
elif isinstance(self.__dict__.get(accessor), Serializable):
dictionary[accessor] = self.__dict__.get(accessor).serialize()
else:
dictionary[accessor] = self.__dict__.get(accessor)
return dictionary
Here is a solution that lets you select the relations you want to include in your output as deep as you would like to go.
NOTE: This is a complete re-write taking a dict/str as an arg rather than a list. fixes some stuff..
def deep_dict(self, relations={}):
"""Output a dict of an SA object recursing as deep as you want.
Takes one argument, relations which is a dictionary of relations we'd
like to pull out. The relations dict items can be a single relation
name or deeper relation names connected by sub dicts
Example:
Say we have a Person object with a family relationship
person.deep_dict(relations={'family':None})
Say the family object has homes as a relation then we can do
person.deep_dict(relations={'family':{'homes':None}})
OR
person.deep_dict(relations={'family':'homes'})
Say homes has a relation like rooms you can do
person.deep_dict(relations={'family':{'homes':'rooms'}})
and so on...
"""
mydict = dict((c, str(a)) for c, a in
self.__dict__.items() if c != '_sa_instance_state')
if not relations:
# just return ourselves
return mydict
# otherwise we need to go deeper
if not isinstance(relations, dict) and not isinstance(relations, str):
raise Exception("relations should be a dict, it is of type {}".format(type(relations)))
# got here so check and handle if we were passed a dict
if isinstance(relations, dict):
# we were passed deeper info
for left, right in relations.items():
myrel = getattr(self, left)
if isinstance(myrel, list):
mydict[left] = [rel.deep_dict(relations=right) for rel in myrel]
else:
mydict[left] = myrel.deep_dict(relations=right)
# if we get here check and handle if we were passed a string
elif isinstance(relations, str):
# passed a single item
myrel = getattr(self, relations)
left = relations
if isinstance(myrel, list):
mydict[left] = [rel.deep_dict(relations=None)
for rel in myrel]
else:
mydict[left] = myrel.deep_dict(relations=None)
return mydict
so for an example using person/family/homes/rooms... turning it into json all you need is
json.dumps(person.deep_dict(relations={'family':{'homes':'rooms'}}))
step1:
class CNAME:
...
def as_dict(self):
return {item.name: getattr(self, item.name) for item in self.__table__.columns}
step2:
list = []
for data in session.query(CNAME).all():
list.append(data.as_dict())
step3:
return jsonify(list)
Even though it's a old post, Maybe I didn't answer the question above, but I want to talk about my serialization, at least it works for me.
I use FastAPI,SqlAlchemy and MySQL, but I don't use orm model;
# from sqlalchemy import create_engine
# from sqlalchemy.orm import sessionmaker
# engine = create_engine(config.SQLALCHEMY_DATABASE_URL, pool_pre_ping=True)
# SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Serialization code
import decimal
import datetime
def alchemy_encoder(obj):
"""JSON encoder function for SQLAlchemy special classes."""
if isinstance(obj, datetime.date):
return obj.strftime("%Y-%m-%d %H:%M:%S")
elif isinstance(obj, decimal.Decimal):
return float(obj)
import json
from sqlalchemy import text
# db is SessionLocal() object
app_sql = 'SELECT * FROM app_info ORDER BY app_id LIMIT :page,:page_size'
# The next two are the parameters passed in
page = 1
page_size = 10
# execute sql and return a <class 'sqlalchemy.engine.result.ResultProxy'> object
app_list = db.execute(text(app_sql), {'page': page, 'page_size': page_size})
# serialize
res = json.loads(json.dumps([dict(r) for r in app_list], default=alchemy_encoder))
If it doesn't work, please ignore my answer. I refer to it here
https://codeandlife.com/2014/12/07/sqlalchemy-results-to-json-the-easy-way/
install simplejson by
pip install simplejson and the create a class
class Serialise(object):
def _asdict(self):
"""
Serialization logic for converting entities using flask's jsonify
:return: An ordered dictionary
:rtype: :class:`collections.OrderedDict`
"""
result = OrderedDict()
# Get the columns
for key in self.__mapper__.c.keys():
if isinstance(getattr(self, key), datetime):
result["x"] = getattr(self, key).timestamp() * 1000
result["timestamp"] = result["x"]
else:
result[key] = getattr(self, key)
return result
and inherit this class to every orm classes so that this _asdict function gets registered to every ORM class and boom.
And use jsonify anywhere
It is not so straighforward. I wrote some code to do this. I'm still working on it, and it uses the MochiKit framework. It basically translates compound objects between Python and Javascript using a proxy and registered JSON converters.
Browser side for database objects is db.js
It needs the basic Python proxy source in proxy.js.
On the Python side there is the base proxy module.
Then finally the SqlAlchemy object encoder in webserver.py.
It also depends on metadata extractors found in the models.py file.
def alc2json(row):
return dict([(col, str(getattr(row,col))) for col in row.__table__.columns.keys()])
I thought I'd play a little code golf with this one.
FYI: I am using automap_base since we have a separately designed schema according to business requirements. I just started using SQLAlchemy today but the documentation states that automap_base is an extension to declarative_base which seems to be the typical paradigm in the SQLAlchemy ORM so I believe this should work.
It does not get fancy with following foreign keys per Tjorriemorrie's solution, but it simply matches columns to values and handles Python types by str()-ing the column values. Our values consist Python datetime.time and decimal.Decimal class type results so it gets the job done.
Hope this helps any passers-by!
I know this is quite an older post. I took solution given by #SashaB and modified as per my need.
I added following things to it:
Field ignore list: A list of fields to be ignored while serializing
Field replace list: A dictionary containing field names to be replaced by values while serializing.
Removed methods and BaseQuery getting serialized
My code is as follows:
def alchemy_json_encoder(revisit_self = False, fields_to_expand = [], fields_to_ignore = [], fields_to_replace = {}):
"""
Serialize SQLAlchemy result into JSon
:param revisit_self: True / False
:param fields_to_expand: Fields which are to be expanded for including their children and all
:param fields_to_ignore: Fields to be ignored while encoding
:param fields_to_replace: Field keys to be replaced by values assigned in dictionary
:return: Json serialized SQLAlchemy object
"""
_visited_objs = []
class AlchemyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
# don't re-visit self
if revisit_self:
if obj in _visited_objs:
return None
_visited_objs.append(obj)
# go through each field in this SQLalchemy class
fields = {}
for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata' and x not in fields_to_ignore]:
val = obj.__getattribute__(field)
# is this field method defination, or an SQLalchemy object
if not hasattr(val, "__call__") and not isinstance(val, BaseQuery):
field_name = fields_to_replace[field] if field in fields_to_replace else field
# is this field another SQLalchemy object, or a list of SQLalchemy objects?
if isinstance(val.__class__, DeclarativeMeta) or \
(isinstance(val, list) and len(val) > 0 and isinstance(val[0].__class__, DeclarativeMeta)):
# unless we're expanding this field, stop here
if field not in fields_to_expand:
# not expanding this field: set it to None and continue
fields[field_name] = None
continue
fields[field_name] = val
# a json-encodable dict
return fields
return json.JSONEncoder.default(self, obj)
return AlchemyEncoder
Hope it helps someone!
Use the built-in serializer in SQLAlchemy:
from sqlalchemy.ext.serializer import loads, dumps
obj = MyAlchemyObject()
# serialize object
serialized_obj = dumps(obj)
# deserialize object
obj = loads(serialized_obj)
If you're transferring the object between sessions, remember to detach the object from the current session using session.expunge(obj).
To attach it again, just do session.add(obj).
Under Flask, this works and handles datatime fields, transforming a field of type
'time': datetime.datetime(2018, 3, 22, 15, 40) into
"time": "2018-03-22 15:40:00":
obj = {c.name: str(getattr(self, c.name)) for c in self.__table__.columns}
# This to get the JSON body
return json.dumps(obj)
# Or this to get a response object
return jsonify(obj)
following code will serialize sqlalchemy result to json.
import json
from collections import OrderedDict
def asdict(self):
result = OrderedDict()
for key in self.__mapper__.c.keys():
if getattr(self, key) is not None:
result[key] = str(getattr(self, key))
else:
result[key] = getattr(self, key)
return result
def to_array(all_vendors):
v = [ ven.asdict() for ven in all_vendors ]
return json.dumps(v)
Calling fun,
def all_products():
all_products = Products.query.all()
return to_array(all_products)
The AlchemyEncoder is wonderful but sometimes fails with Decimal values. Here is an improved encoder that solves the decimal problem -
class AlchemyEncoder(json.JSONEncoder):
# To serialize SQLalchemy objects
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
model_fields = {}
for field in [x for x in dir(obj) if not x.startswith('_') and x != 'metadata']:
data = obj.__getattribute__(field)
print data
try:
json.dumps(data) # this will fail on non-encodable values, like other classes
model_fields[field] = data
except TypeError:
model_fields[field] = None
return model_fields
if isinstance(obj, Decimal):
return float(obj)
return json.JSONEncoder.default(self, obj)
When using sqlalchemy to connect to a db I this is a simple solution which is highly configurable. Use pandas.
import pandas as pd
import sqlalchemy
#sqlalchemy engine configuration
engine = sqlalchemy.create_engine....
def my_function():
#read in from sql directly into a pandas dataframe
#check the pandas documentation for additional config options
sql_DF = pd.read_sql_table("table_name", con=engine)
# "orient" is optional here but allows you to specify the json formatting you require
sql_json = sql_DF.to_json(orient="index")
return sql_json
(Tiny tweak on Sasha B's really excellent answer)
This specifically converts datetime objects to strings which in the original answer would be converted to None:
# Standard library imports
from datetime import datetime
import json
# 3rd party imports
from sqlalchemy.ext.declarative import DeclarativeMeta
class JsonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj.__class__, DeclarativeMeta):
dict = {}
# Remove invalid fields and just get the column attributes
columns = [x for x in dir(obj) if not x.startswith("_") and x != "metadata"]
for column in columns:
value = obj.__getattribute__(column)
try:
json.dumps(value)
dict[column] = value
except TypeError:
if isinstance(value, datetime):
dict[column] = value.__str__()
else:
dict[column] = None
return dict
return json.JSONEncoder.default(self, obj)
class SqlToDict:
def __init__(self, data) -> None:
self.data = data
def to_timestamp(self, date):
if isinstance(date, datetime):
return int(datetime.timestamp(date))
else:
return date
def to_dict(self) -> List:
arr = []
for i in self.data:
keys = [*i.keys()]
values = [*i]
values = [self.to_timestamp(d) for d in values]
arr.append(dict(zip(keys, values)))
return arr
For example:
SqlToDict(data).to_dict()
Very late 2023
My implementation
def obj_to_dict(obj, remove=['_sa_instance_state'], debug=False):
result = {}
if type(obj).__name__ == "Row":
return dict(obj)
obj = obj.__dict__
for key in obj:
if key in remove:
continue
result[key] = obj[key]
if debug:
print(result)
return result
The built in serializer chokes with utf-8 cannot decode invalid start byte for some inputs. Instead, I went with:
def row_to_dict(row):
temp = row.__dict__
temp.pop('_sa_instance_state', None)
return temp
def rows_to_list(rows):
ret_rows = []
for row in rows:
ret_rows.append(row_to_dict(row))
return ret_rows
#website_blueprint.route('/api/v1/some/endpoint', methods=['GET'])
def some_api():
'''
/some_endpoint
'''
rows = rows_to_list(SomeModel.query.all())
response = app.response_class(
response=jsonplus.dumps(rows),
status=200,
mimetype='application/json'
)
return response
Maybe you can use a class like this
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy import Table
class Custom:
"""Some custom logic here!"""
__table__: Table # def for mypy
#declared_attr
def __tablename__(cls): # pylint: disable=no-self-argument
return cls.__name__ # pylint: disable= no-member
def to_dict(self) -> Dict[str, Any]:
"""Serializes only column data."""
return {c.name: getattr(self, c.name) for c in self.__table__.columns}
Base = declarative_base(cls=Custom)
class MyOwnTable(Base):
#COLUMNS!
With that all objects have the to_dict method
While using some raw sql and undefined objects, using cursor.description appeared to get what I was looking for:
with connection.cursor() as cur:
print(query)
cur.execute(query)
for item in cur.fetchall():
row = {column.name: item[i] for i, column in enumerate(cur.description)}
print(row)
This is a JSONEncoder version that preserves model column order and only keeps recursively defined column and relationship fields. It also formats most JSON unserializable types:
import json
from datetime import datetime
from decimal import Decimal
import arrow
from sqlalchemy.ext.declarative import DeclarativeMeta
class SQLAlchemyJSONEncoder(json.JSONEncoder):
"""
SQLAlchemy ORM JSON Encoder
If you have a "backref" relationship defined in your SQLAlchemy model,
this encoder raises a ValueError to stop an infinite loop.
"""
def default(self, obj):
if isinstance(obj, datetime):
return arrow.get(obj).isoformat()
elif isinstance(obj, Decimal):
return float(obj)
elif isinstance(obj, set):
return sorted(obj)
elif isinstance(obj.__class__, DeclarativeMeta):
for attribute, relationship in obj.__mapper__.relationships.items():
if isinstance(relationship.__getattribute__("backref"), tuple):
raise ValueError(
f'{obj.__class__} object has a "backref" relationship '
"that would cause an infinite loop!"
)
dictionary = {}
column_names = [column.name for column in obj.__table__.columns]
for key in column_names:
value = obj.__getattribute__(key)
if isinstance(value, datetime):
value = arrow.get(value).isoformat()
elif isinstance(value, Decimal):
value = float(value)
elif isinstance(value, set):
value = sorted(value)
dictionary[key] = value
for key in [
attribute
for attribute in dir(obj)
if not attribute.startswith("_")
and attribute != "metadata"
and attribute not in column_names
]:
value = obj.__getattribute__(key)
dictionary[key] = value
return dictionary
return super().default(obj)

Python serializable objects json [duplicate]

This question already has answers here:
How to make a class JSON serializable
(41 answers)
Closed 6 months ago.
class gpagelet:
"""
Holds 1) the pagelet xpath, which is a string
2) the list of pagelet shingles, list
"""
def __init__(self, parent):
if not isinstance( parent, gwebpage):
raise Exception("Parent must be an instance of gwebpage")
self.parent = parent # This must be a gwebpage instance
self.xpath = None # String
self.visibleShingles = [] # list of tuples
self.invisibleShingles = [] # list of tuples
self.urls = [] # list of string
class gwebpage:
"""
Holds all the datastructure after the results have been parsed
holds: 1) lists of gpagelets
2) loc, string, location of the file that represents it
"""
def __init__(self, url):
self.url = url # Str
self.netloc = False # Str
self.gpagelets = [] # gpagelets instance
self.page_key = "" # str
Is there a way for me to make my class json serializable? The thing that I am worried is the recursive reference.
Write your own encoder and decoder, which can be very simple like return __dict__
e.g. here is a encoder to dump totally recursive tree structure, you can enhance it or use as it is for your own purpose
import json
class Tree(object):
def __init__(self, name, childTrees=None):
self.name = name
if childTrees is None:
childTrees = []
self.childTrees = childTrees
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if not isinstance(obj, Tree):
return super(MyEncoder, self).default(obj)
return obj.__dict__
c1 = Tree("c1")
c2 = Tree("c2")
t = Tree("t",[c1,c2])
print json.dumps(t, cls=MyEncoder)
it prints
{"childTrees": [{"childTrees": [], "name": "c1"}, {"childTrees": [], "name": "c2"}], "name": "t"}
you can similarly write a decoder but there you will somehow need to identify is it is your object or not, so may be you can put a type too if needed.
Indirect answer: instead of using JSON, you could use YAML, which has no problem doing what you want. (JSON is essentially a subset of YAML.)
Example:
import yaml
o1 = gwebpage("url")
o2 = gpagelet(o1)
o1.gpagelets = [o2]
print yaml.dump(o1)
In fact, YAML nicely handles cyclic references for you.
I implemented a very simple todict method with the help of https://stackoverflow.com/a/11637457/1766716
Iterate over properties that is not starts with __
Eliminate methods
Eliminate some properties manually which is not necessary (for my case, coming from sqlalcemy)
And used getattr to build dictionary.
class User(Base):
id = Column(Integer, primary_key=True)
firstname = Column(String(50))
lastname = Column(String(50))
password = Column(String(20))
def props(self):
return filter(
lambda a:
not a.startswith('__')
and a not in ['_decl_class_registry', '_sa_instance_state', '_sa_class_manager', 'metadata']
and not callable(getattr(self, a)),
dir(self))
def todict(self):
return {k: self.__getattribute__(k) for k in self.props()}
My solution for this was to extend the 'dict' class and perform checks around required/allowed attributes by overriding init, update, and set class methods.
class StrictDict(dict):
required=set()
at_least_one_required=set()
cannot_coexist=set()
allowed=set()
def __init__(self, iterable={}, **kwargs):
super(StrictDict, self).__init__({})
keys = set(iterable.keys()).union(set(kwargs.keys()))
if not keys.issuperset(self.required):
msg = str(self.__class__.__name__) + " requires: " + str([str(key) for key in self.required])
raise AttributeError(msg)
if len(list(self.at_least_one_required)) and len(list(keys.intersection(self.at_least_one_required))) < 1:
msg = str(self.__class__.__name__) + " requires at least one: " + str([str(key) for key in self.at_least_one_required])
raise AttributeError(msg)
for key, val in iterable.iteritems():
self.__setitem__(key, val)
for key, val in kwargs.iteritems():
self.__setitem__(key, val)
def update(self, E=None, **F):
for key, val in E.iteritems():
self.__setitem__(key, val)
for key, val in F.iteritems():
self.__setitem__(key, val)
super(StrictDict, self).update({})
def __setitem__(self, key, value):
all_allowed = self.allowed.union(self.required).union(self.at_least_one_required).union(self.cannot_coexist)
if key not in list(all_allowed):
msg = str(self.__class__.__name__) + " does not allow member '" + key + "'"
raise AttributeError(msg)
if key in list(self.cannot_coexist):
for item in list(self.cannot_coexist):
if key != item and item in self.keys():
msg = str(self.__class__.__name__) + "does not allow members '" + key + "' and '" + item + "' to coexist'"
raise AttributeError(msg)
super(StrictDict, self).__setitem__(key, value)
Example usage:
class JSONDoc(StrictDict):
"""
Class corresponding to JSON API top-level document structure
http://jsonapi.org/format/#document-top-level
"""
at_least_one_required={'data', 'errors', 'meta'}
allowed={"jsonapi", "links", "included"}
cannot_coexist={"data", "errors"}
def __setitem__(self, key, value):
if key == "included" and "data" not in self.keys():
msg = str(self.__class__.__name__) + " does not allow 'included' member if 'data' member is not present"
raise AttributeError(msg)
super(JSONDoc, self).__setitem__(key, value)
json_doc = JSONDoc(
data={
"id": 5,
"type": "movies"
},
links={
"self": "http://url.com"
}
)

Categories