I have a User class in SQLAlchemy. I want to be able to encrypt the user's email address attribute in the database but still make it searchable through the filter query.
My problem is that if I use #hybrid_property my query theoretically works, but my construction doesn't, and if I use #property my construction works but my query doesn't
from cryptography.fernet import Fernet # <- pip install cryptography
from werkzeug.security import generate_password_hash
class User(db.Model):
__tablename__ = 'users'
id = db.Column(db.Integer, primary_key=True)
email_hash = db.Column(db.String(184), unique=True, nullable=False)
password_hash = db.Column(db.String(128))
# #property # <- Consider this as option 2...
#hybrid_property # <- Consider this as option 1...
def email(self):
f = Fernet('SOME_ENC_KEY')
value = f.decrypt(self.email_hash.encode('utf-8'))
return value
#email.setter
def email(self, email):
f = Fernet('SOME_ENC_KEY')
self.email_hash = f.encrypt(email.encode('utf-8'))
#property
def password(self):
raise AttributeError('password is not a readable attribute.')
#password.setter
def password(self, password):
self.password_hash = generate_password_hash(password)
def __init__(self, **kwargs):
super(User, self).__init__(**kwargs)
# other checks and modifiers
For option 1: When I attempt to construct a user with User(email='a#example.com',password='secret') I receive the traceback,
~/models.py in __init__(self, **kwargs)
431 # Established role assignment by default class initiation
432 def __init__(self, **kwargs):
--> 433 super(User, self).__init__(**kwargs)
434 if self.role is None:
435 _default_role = Role.query.filter_by(default=True).first()
~/lib/python3.6/site-packages/sqlalchemy/ext/declarative/base.py in _declarative_constructor(self, **kwargs)
697 raise TypeError(
698 "%r is an invalid keyword argument for %s" %
--> 699 (k, cls_.__name__))
700 setattr(self, k, kwargs[k])
701 _declarative_constructor.__name__ = '__init__'
TypeError: 'email' is an invalid keyword argument for User
For option 2: If instead I change #hybrid_property to #property the construction is fine but then my query User.query.filter_by(email=form.email.data.lower()).first() fails and returns None.
What should I change to get it working as required?
==============
Note I should say that I have tried to avoid using dual attributes since I didn't want to make extensive edits to the underlying codebase. so I have explicitly tried to avoid separating creation with querying in terms of User(email_input='a#a.com', password='secret') and User.query.filter_by(email='a#a.com').first():
class User(db.Model):
__tablename__ = 'users'
id = db.Column(db.Integer, primary_key=True)
email_hash = db.Column(db.String(184), unique=True, nullable=False)
password_hash = db.Column(db.String(128))
#hybrid_property
def email(self):
f = Fernet('SOME_ENC_KEY')
value = f.decrypt(self.email_hash.encode('utf-8'))
return value
#property
def email_input(self):
raise AttributeError('email_input is not a readable attribute.')
#email_input.setter
def email_input(self, email):
f = Fernet('SOME_ENC_KEY')
self.email_hash = f.encrypt(email.encode('utf-8'))
#property
def password(self):
raise AttributeError('password is not a readable attribute.')
#password.setter
def password(self, password):
self.password_hash = generate_password_hash(password)
def __init__(self, **kwargs):
super(User, self).__init__(**kwargs)
# other checks and modifiers
In your hybrid_property, email, the line self.f.decrypt(self.email_hash.encode('utf-8')) is fine if self.email_hash is a str type, however, as email is a hybrid_property, when SQLAlchemy uses it to generate SQL self.email_hash is actually a sqlalchemy.orm.attributes.InstrumentedAttribute type.
From the docs regarding hybrid properties:
In many cases, the construction of an in-Python function and a
SQLAlchemy SQL expression have enough differences that two separate
Python expressions should be defined.
And so you can define an hybrid_property.expression method which is what SQLAlchemy will use to generate sql, allowing you to keep your string treatment intact in your hybrid_property method.
Here is the code I ended up with that worked for me given your example. I've stripped quite a bit out of your User model for simplicity but all the important parts are there. I also had to make up implementations for other functions/classes that were called in your code but not supplied (see MCVE):
class Fernet:
def __init__(self, k):
self.k = k
def encrypt(self, s):
return s
def decrypt(self, s):
return s
def get_env_variable(s):
return s
def generate_password_hash(s):
return s
class User(db.Model):
__tablename__ = 'users'
id = db.Column(db.Integer, primary_key=True)
email_hash = db.Column(db.String(184), unique=True, nullable=False)
f = Fernet(get_env_variable('FERNET_KEY'))
#hybrid_property
def email(self):
return self.f.decrypt(self.email_hash.encode('utf-8'))
#email.expression
def email(cls):
return cls.f.decrypt(cls.email_hash)
#email.setter
def email(self, email):
self.email_hash = self.f.encrypt(email.encode('utf-8'))
if __name__ == '__main__':
db.drop_all()
db.create_all()
u = User(email='a#example.com')
db.session.add(u)
db.session.commit()
print(User.query.filter_by(email='a#example.com').first())
# <User 1>
Unfortunately, the code above only works because the mock Fernet.decrypt method returns the exact object that was passed in. The problem with storing a Fernet encoded hash of the user's email addresses is that Fernet.encrypt does not return the same fernet token from one execution to the next, even with the same key. E.g.:
>>> from cryptography.fernet import Fernet
>>> f = Fernet(Fernet.generate_key())
>>> f.encrypt('a#example.com'.encode('utf-8')) == f.encrypt('a#example.com'.encode('utf-8'))
False
So, you want to query a database for a record, but with no way of knowing what the stored value of field that you are querying actually is at query time. You could build a classmethod that queries the entire users table and loop through each record, decrypting it's stored hash and comparing it to the clear text email. Or you can build a hashing function that will always return the same value, hash new users emails using that function and query the email_hash field directly with the hash of the email string. Of those, the first would be very inefficient given lots of users.
The Fernet.encrypt function is:
def encrypt(self, data):
current_time = int(time.time())
iv = os.urandom(16)
return self._encrypt_from_parts(data, current_time, iv)
So, you could define static values of current_time and iv and directly call Fermat._encrypt_from_parts yourself. Or you could use python's built in hash and just set a fixed seed so that it is deterministic. You could then hash the email string that you want to query and first and directly query Users.email_hash. As long as you didn't do any of the above for password fields!
Related
I often use an update function when working with Flask-SQLAlchemy models:
from app import db
class User(db.Model):
__tablename__ = 'User'
id = db.Column(db.Integer, primary_key=True)
email = db.Column(db.String(255))
name = db.Column(db.String(255))
def update(self, email=None, name=None):
if email is not None:
self.email = email
if name is not None:
self.name = name
def dump(self):
return dict([(k, v) for k, v in vars(self).items() if not k.startswith("_")])
This allows me to directly update an object with a json body:
user.update(**body)
But with a table containing a lot of columns, writing this function can be really annoying.
Do you know a more concise approach?
You can iterate over dict fields and use setattr to update:
for field, value in body.items():
if value is not None:
setattr(self, field, value)
def decorator(cls):
#code
return cls
#decorator
class User(db.Model):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(20),nullable=False)
ssid = db.Column(db.String(20))
def __repr__(self):
return f"User('{self.username}',{self.password})"
I want to decorate a class such that i could be able to access the value of ssid in decorator function and add a new attribute to the class.As the new attribute requires the value of ssid.
user = User(username='prince',ssid='9734ait')
db.session.add(user)
This doesn't seem like an appropriate use case for a decorator... seems to me you can just use inheritance and add a new attribute in the __init__. For instance:
class User(db.Model):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(20),nullable=False)
ssid = db.Column(db.String(20))
def __init__(self, *args, password=None, your_new_attribute=None, **kwargs):
super().__init__(*args, **kwargs)
self.password = hash(ssid)
self.your_new_attribute = your_new_attribute
def __repr__(self):
return f"User('{self.username}',{self.password})"
If you insist on using a decorator:
class Decorator:
def __call__(self, cls):
class Inner(cls):
cls.password = cls.ssid[::-1]
return Inner
#Decorator()
class User:
ssid = "fooo"
def __repr__(self):
return f"User({self.ssid}, {self.password})"
u = User()
print(u)
Output:
User(fooo, ooof)
Would defining a property within the decorator be sufficient for your use case ?
For example:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///site.db"
db = SQLAlchemy(app)
class Encryptor:
def __call__(self, cls):
class Inner(cls):
# define a getter function to return the password
def password_getter(self):
# return the calculated password here now you have access to username and ssid
return f'{self.username} - {self.ssid}'.upper()
setattr(cls, "password", property(fget=password_getter))
return Inner
#Encryptor()
class User5(db.Model):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(20), nullable=False)
ssid = db.Column(db.String(20))
def __repr__(self):
return f"User('{self.username}',{self.ssid}; PASSWORD: {self.password})"
db.create_all()
user = User5(username="prince", ssid="3456ait")
db.session.add(user)
db.session.commit()
users = User5.query.all()
print(users)
In a comment, you said that the actual goal is to encrypt passwords on the way in and out of the class. Sqlalchemy offers this using hybrid properties. This is an example from one of my projects-
class User(Base):
__tablename__ = "user"
id = Column(Integer, primary_key=True)
username = Column(String(255))
hashed_password = Column("password", String(255))
#hybrid_property
def password(self):
return self.hashed_password
#password.setter # type: ignore
def password(self, value):
rounds = 4
if not isinstance(value, bytes):
value = value.encode("utf-8")
self.hashed_password = hashpw(value, gensalt(rounds)).decode("utf-8")
(so only the hashed password is stored in the database in this case- to check the password, you hash the input and compare it to user.password)
I'm new to MongoEngine and it looks like we need to create sub classes of the class Document from the mongoengine to model our DB. I'm a little concerned here because this violates the Dependency Inversion from the SOLID principles. So if I need to use another database at a later point of time, I will have to change my domain model classes which I shouldn't really be doing.
SQLAlchemy overcomes this by providing a beautiful classical mapping. Using this, the database dependent code is separated from my domain model, so I don't really need to worry about the database provider and I can easily abstract the details away should I have a need to change my database.
Is there a equivalent of this for MongoDB, preferrably in MongoEngine?
Pymongo's official doc provides a list of the existing ORM/ODM and frameworks but to my knowledge they all implement the Active Record Pattern (just like django ORM), which as you said, violates the SOLID principles but is good enough for many simple use cases.
MongoAlchemy, which was inspired by SQLAlchemy uses a concept of session so it may be closer to what you are looking for but the project is no longer maintained.
If I understand correctly, you're trying to map an object to document schema using mongoengine.
Let's create a document class for a user:
from mongoengine import Document, StringField
class UserDocument(Document):
username = StringField(required=True)
password = StringField(required=True)
email = StringField(required=True)
Now add a class method that creates new users:
from mongoengine import disconnect, connect, Document, StringField
class UserDocument(Document):
username = StringField(required=True)
password = StringField(required=True)
email = StringField(required=True)
#classmethod
def new(cls):
data = UserDocument(username=cls.username, password=cls.password, email=cls.email)
connect('test_collection')
data.save()
disconnect('test_collection')
As I understand your question, your issue in this example is that UserDocument would be aware of mongoengine thus violating the dependency inversion principle. This can be solved with a child class.
First allow inheritance in UserDocument:
...
class UserDocument(Document):
meta = {'allow_inheritance': True}
username = StringField(required=True)
...
Next we build the child:
from user_document import UserDocument
# Maps object to schema
class User(UserDocument):
def __init__(self, *args, **values):
super().__init__(*args, **values)
Next add a create method:
from user_document import UserDocument
# Maps object to schema
class User(UserDocument):
def __init__(self, *args, **values):
super().__init__(*args, **values)
def create(self, username, password, email):
self.username, self.password, self.email = username, password, email
User.new()
Now our User object inherits the UserDocument fields. UserDocument.new can be accessed directly or through the child with User.new().
from model import User
username, password, email = 'cool username', 'super secret password', 'mrcool#example.com'
User.create(User, username, password, email)
The User object is aware of UserDocument which in turn depends on mongoengine.
I apologize if I misunderstood or used incorrect vocabulary to describe the example solution. I'm relatively new, self-taught, and have no friends who code which makes discussion difficult.
This topic is covered in the first 6 chapters of CosmicPython/Architecture Patterns With Python.
However, in those chapters it uses SQLAlchemy with mappers.
The book does have a section with an example for other ORMs that use an ActiveRecord style - like mongoengine - in
Appendix D: Repository and Unit of Work Patterns with Django.
First the models are defined.
Please note the following example may be hard to follow without any background and so I recommend reading the first 6 chapters of CosmicPython if the example below is unclear.
src/djangoproject/alloc/models.py
from django.db import models
from allocation.domain import model as domain_model
class Batch(models.Model):
reference = models.CharField(max_length=255)
sku = models.CharField(max_length=255)
qty = models.IntegerField()
eta = models.DateField(blank=True, null=True)
#staticmethod
def update_from_domain(batch: domain_model.Batch):
try:
b = Batch.objects.get(reference=batch.reference)
except Batch.DoesNotExist:
b = Batch(reference=batch.reference)
b.sku = batch.sku
b.qty = batch._purchased_quantity
b.eta = batch.eta
b.save()
b.allocation_set.set(
Allocation.from_domain(l, b)
for l in batch._allocations
)
def to_domain(self) -> domain_model.Batch:
b = domain_model.Batch(
ref=self.reference, sku=self.sku, qty=self.qty, eta=self.eta
)
b._allocations = set(
a.line.to_domain()
for a in self.allocation_set.all()
)
return b
class OrderLine(models.Model):
orderid = models.CharField(max_length=255)
sku = models.CharField(max_length=255)
qty = models.IntegerField()
def to_domain(self):
return domain_model.OrderLine(
orderid=self.orderid, sku=self.sku, qty=self.qty
)
#staticmethod
def from_domain(line):
l, _ = OrderLine.objects.get_or_create(
orderid=line.orderid, sku=line.sku, qty=line.qty
)
return l
class Allocation(models.Model):
batch = models.ForeignKey(Batch, on_delete=models.CASCADE)
line = models.ForeignKey(OrderLine, on_delete=models.CASCADE)
#staticmethod
def from_domain(domain_line, django_batch):
a, _ = Allocation.objects.get_or_create(
line=OrderLine.from_domain(domain_line),
batch=django_batch,
)
return a
Then a port and adapter are defined for the repository pattern in
src/allocation/adapters/repository.py
# pylint: disable=no-member, no-self-use
from typing import Set
import abc
from allocation.domain import model
from djangoproject.alloc import models as django_models
class AbstractRepository(abc.ABC):
def __init__(self):
self.seen = set() # type: Set[model.Batch]
def add(self, batch: model.Batch):
self.seen.add(batch)
def get(self, reference) -> model.Batch:
p = self._get(reference)
if p:
self.seen.add(p)
return p
#abc.abstractmethod
def _get(self, reference):
raise NotImplementedError
class DjangoRepository(AbstractRepository):
def add(self, batch):
super().add(batch)
self.update(batch)
def update(self, batch):
django_models.Batch.update_from_domain(batch)
def _get(self, reference):
return (
django_models.Batch.objects.filter(reference=reference)
.first()
.to_domain()
)
def list(self):
return [b.to_domain() for b in django_models.Batch.objects.all()]
Along with the domain models
src/allocation/domain/model.py
from __future__ import annotations
from dataclasses import dataclass
from datetime import date
from typing import Optional, List, Set
class OutOfStock(Exception):
pass
def allocate(line: OrderLine, batches: List[Batch]) -> str:
try:
batch = next(b for b in sorted(batches) if b.can_allocate(line))
batch.allocate(line)
return batch.reference
except StopIteration:
raise OutOfStock(f"Out of stock for sku {line.sku}")
#dataclass(unsafe_hash=True)
class OrderLine:
orderid: str
sku: str
qty: int
class Batch:
def __init__(self, ref: str, sku: str, qty: int, eta: Optional[date]):
self.reference = ref
self.sku = sku
self.eta = eta
self._purchased_quantity = qty
self._allocations = set() # type: Set[OrderLine]
def __repr__(self):
return f"<Batch {self.reference}>"
def __eq__(self, other):
if not isinstance(other, Batch):
return False
return other.reference == self.reference
def __hash__(self):
return hash(self.reference)
def __gt__(self, other):
if self.eta is None:
return False
if other.eta is None:
return True
return self.eta > other.eta
def allocate(self, line: OrderLine):
if self.can_allocate(line):
self._allocations.add(line)
def deallocate(self, line: OrderLine):
if line in self._allocations:
self._allocations.remove(line)
#property
def allocated_quantity(self) -> int:
return sum(line.qty for line in self._allocations)
#property
def available_quantity(self) -> int:
return self._purchased_quantity - self.allocated_quantity
def can_allocate(self, line: OrderLine) -> bool:
return self.sku == line.sku and self.available_quantity >= line.qty
I have some tests that were working when I ran them with regular database objects but are broken now that I am using FactoryBoy factories. I think I understand why they are broken but am struggling with the correct way to set this up.
Here are my factories:
#register
class UserFactory(BaseFactory):
"""User factory."""
username = Sequence(lambda n: 'user{0}'.format(n))
email = Sequence(lambda n: 'user{0}#example.com'.format(n))
password = PostGenerationMethodCall('set_password', 'example')
active = True
class Meta:
"""Factory configuration."""
model = User
#register
class ExperimentFactory(BaseFactory):
"""Experiment Factory."""
date = fake.date_this_decade(before_today=True, after_today=False)
scanner = Iterator(['GE', 'Sie', 'Phi'])
class Meta:
"""Factory configuration."""
model = Experiment
user = factory.SubFactory(UserFactory)
According to this answer and other examples, FactoryBoy is supposed to be handling the foreign key assignment behind the scenes.
But when I try to initialize my ExperimentFactory object in my fixture, I have a problem.
#pytest.fixture(scope='function')
#pytest.mark.usefixtures('db')
def mocked_scan_service(db, mocker, request):
user = UserFactory(password='myprecious')
db.session.add(user)
num_exp, num_scans, exp_id, scan_id, exp_uri, scan_uri = request.param
for i in range(num_exp):
experiment = ExperimentFactory(user_id = user.id)
db.session.add(experiment)
db.session.commit()
ss = ScanService(user.id, experiment.id)
for i in range(num_scans):
ss._add_scan_to_database()
ss.xc.upload_scan = mocker.MagicMock()
ss.xc.upload_scan.return_value = ('/data/archive/subjects/000001', exp_uri, scan_uri)
mocker.spy(ss, '_generate_xnat_identifiers')
ss.param = request.param
return ss
If I don't pass ExperimentFactory a user id, I get this error:
TypeError: __init__() missing 1 required positional argument: 'user_id'
Here's the model; it makes sense to me that the factory needs an argument user_id to initialize:
class Experiment(SurrogatePK, Model):
"""A user's experiment, during which they are scanned."""
__tablename__ = 'experiment'
date = Column(db.Date(), nullable=False)
scanner = Column(db.String(80), nullable=True)
num_scans = Column(db.Integer(), nullable=True, default=0)
xnat_experiment_id = Column(db.String(80), nullable=True)
xnat_uri = Column(db.String(80), nullable=True)
user_id = reference_col('user', nullable=False)
scans = relationship('Scan', backref='experiment')
def __init__(self, date, scanner, user_id, **kwargs):
"""Create instance."""
db.Model.__init__(self, date=date, scanner=scanner, user_id=user_id, **kwargs)
def __repr__(self):
"""Represent instance as a unique string."""
return '<Experiment({date})>'.format(date=self.date)
But if, as written, I explicitly create a user and then pass the user id, it looks like the ExperimentFactory eventually overwrites the foreign key with the SubFactory it generated. So later when I initialize an object called ScanService which must be initialized with a user_id and and experiment_id, my tests fail for one of two reasons. Either I initialize it with the user_id of my explicitly created user, and my tests fail because they don't find any sibling experiments to the experiment that experiment_id belongs to, or I initialize it with experiment.user.id, and my tests fail because they expect one user in the database, and in fact there are two. That latter problem would fairly easy to work around by rewriting my tests, but that seems janky and unclear. How am I supposed to initialize the ExperimentFactory when the Experiment model requires a user_id for initialization?
If anyone has a better solution feel free to comment, but here's what I realized: it really doesn't matter what I pass in for user_id; I just have to pass in something so that model initialization doesn't fail. And passing in user=user at the same time creates the situation I want: all experiments belong to the same user. Now all my tests pass. Here's the modified fixture code; everything else remained the same:
#pytest.fixture(scope='function')
#pytest.mark.usefixtures('db')
def mocked_scan_service(db, mocker, request):
num_exp, num_scans, exp_id, scan_id, exp_uri, scan_uri = request.param
user = UserFactory(password='myprecious')
for i in range(num_exp):
experiment = ExperimentFactory(user_id=user.id, user=user)
db.session.add(experiment)
db.session.commit()
ss = ScanService(experiment.user.id, experiment.id)
for i in range(num_scans):
ss._add_scan_to_database()
ss.xc.upload_scan = mocker.MagicMock()
ss.xc.upload_scan.return_value = ('/data/archive/subjects/000001', exp_uri, scan_uri)
mocker.spy(ss, '_generate_xnat_identifiers')
ss.param = request.param
return ss
This question already has answers here:
How to serialize SqlAlchemy result to JSON?
(37 answers)
Closed 4 years ago.
I'm trying to jsonify a SQLAlchemy result set in Flask/Python.
The Flask mailing list suggested the following method http://librelist.com/browser//flask/2011/2/16/jsonify-sqlalchemy-pagination-collection-result/#04a0754b63387f87e59dda564bde426e :
return jsonify(json_list = qryresult)
However I'm getting the following error back:
TypeError: <flaskext.sqlalchemy.BaseQuery object at 0x102c2df90>
is not JSON serializable
What am I overlooking here?
I have found this question: How to serialize SqlAlchemy result to JSON? which seems very similar however I didn't know whether Flask had some magic to make it easier as the mailing list post suggested.
Edit: for clarification, this is what my model looks like
class Rating(db.Model):
__tablename__ = 'rating'
id = db.Column(db.Integer, primary_key=True)
fullurl = db.Column(db.String())
url = db.Column(db.String())
comments = db.Column(db.Text)
overall = db.Column(db.Integer)
shipping = db.Column(db.Integer)
cost = db.Column(db.Integer)
honesty = db.Column(db.Integer)
communication = db.Column(db.Integer)
name = db.Column(db.String())
ipaddr = db.Column(db.String())
date = db.Column(db.String())
def __init__(self, fullurl, url, comments, overall, shipping, cost, honesty, communication, name, ipaddr, date):
self.fullurl = fullurl
self.url = url
self.comments = comments
self.overall = overall
self.shipping = shipping
self.cost = cost
self.honesty = honesty
self.communication = communication
self.name = name
self.ipaddr = ipaddr
self.date = date
It seems that you actually haven't executed your query. Try following:
return jsonify(json_list = qryresult.all())
[Edit]: Problem with jsonify is, that usually the objects cannot be jsonified automatically. Even Python's datetime fails ;)
What I have done in the past, is adding an extra property (like serialize) to classes that need to be serialized.
def dump_datetime(value):
"""Deserialize datetime object into string form for JSON processing."""
if value is None:
return None
return [value.strftime("%Y-%m-%d"), value.strftime("%H:%M:%S")]
class Foo(db.Model):
# ... SQLAlchemy defs here..
def __init__(self, ...):
# self.foo = ...
pass
#property
def serialize(self):
"""Return object data in easily serializable format"""
return {
'id' : self.id,
'modified_at': dump_datetime(self.modified_at),
# This is an example how to deal with Many2Many relations
'many2many' : self.serialize_many2many
}
#property
def serialize_many2many(self):
"""
Return object's relations in easily serializable format.
NB! Calls many2many's serialize property.
"""
return [ item.serialize for item in self.many2many]
And now for views I can just do:
return jsonify(json_list=[i.serialize for i in qryresult.all()])
[Edit 2019]:
In case you have more complex objects or circular references, use a library like marshmallow).
Here's what's usually sufficient for me:
I create a serialization mixin which I use with my models. The serialization function basically fetches whatever attributes the SQLAlchemy inspector exposes and puts it in a dict.
from sqlalchemy.inspection import inspect
class Serializer(object):
def serialize(self):
return {c: getattr(self, c) for c in inspect(self).attrs.keys()}
#staticmethod
def serialize_list(l):
return [m.serialize() for m in l]
All that's needed now is to extend the SQLAlchemy model with the Serializer mixin class.
If there are fields you do not wish to expose, or that need special formatting, simply override the serialize() function in the model subclass.
class User(db.Model, Serializer):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String)
password = db.Column(db.String)
# ...
def serialize(self):
d = Serializer.serialize(self)
del d['password']
return d
In your controllers, all you have to do is to call the serialize() function (or serialize_list(l) if the query results in a list) on the results:
def get_user(id):
user = User.query.get(id)
return json.dumps(user.serialize())
def get_users():
users = User.query.all()
return json.dumps(User.serialize_list(users))
I had the same need, to serialize into json. Take a look at this question. It shows how to discover columns programmatically. So, from that I created the code below. It works for me, and I'll be using it in my web app. Happy coding!
def to_json(inst, cls):
"""
Jsonify the sql alchemy query result.
"""
convert = dict()
# add your coversions for things like datetime's
# and what-not that aren't serializable.
d = dict()
for c in cls.__table__.columns:
v = getattr(inst, c.name)
if c.type in convert.keys() and v is not None:
try:
d[c.name] = convert[c.type](v)
except:
d[c.name] = "Error: Failed to covert using ", str(convert[c.type])
elif v is None:
d[c.name] = str()
else:
d[c.name] = v
return json.dumps(d)
class Person(base):
__tablename__ = 'person'
id = Column(Integer, Sequence('person_id_seq'), primary_key=True)
first_name = Column(Text)
last_name = Column(Text)
email = Column(Text)
#property
def json(self):
return to_json(self, self.__class__)
Here's my approach:
https://github.com/n0nSmoker/SQLAlchemy-serializer
pip install SQLAlchemy-serializer
You can easily add mixin to your model and then just call
.to_dict() method on its instance.
You also can write your own mixin on base of SerializerMixin.
For a flat query (no joins) you can do this
#app.route('/results/')
def results():
data = Table.query.all()
result = [d.__dict__ for d in data]
return jsonify(result=result)
and if you only want to return certain columns from the database you can do this
#app.route('/results/')
def results():
cols = ['id', 'url', 'shipping']
data = Table.query.all()
result = [{col: getattr(d, col) for col in cols} for d in data]
return jsonify(result=result)
Ok, I've been working on this for a few hours, and I've developed what I believe to be the most pythonic solution yet. The following code snippets are python3 but shouldn't be too horribly painful to backport if you need.
The first thing we're gonna do is start with a mixin that makes your db models act kinda like dicts:
from sqlalchemy.inspection import inspect
class ModelMixin:
"""Provide dict-like interface to db.Model subclasses."""
def __getitem__(self, key):
"""Expose object attributes like dict values."""
return getattr(self, key)
def keys(self):
"""Identify what db columns we have."""
return inspect(self).attrs.keys()
Now we're going to define our model, inheriting the mixin:
class MyModel(db.Model, ModelMixin):
id = db.Column(db.Integer, primary_key=True)
foo = db.Column(...)
bar = db.Column(...)
# etc ...
That's all it takes to be able to pass an instance of MyModel() to dict() and get a real live dict instance out of it, which gets us quite a long way towards making jsonify() understand it. Next, we need to extend JSONEncoder to get us the rest of the way:
from flask.json import JSONEncoder
from contextlib import suppress
class MyJSONEncoder(JSONEncoder):
def default(self, obj):
# Optional: convert datetime objects to ISO format
with suppress(AttributeError):
return obj.isoformat()
return dict(obj)
app.json_encoder = MyJSONEncoder
Bonus points: if your model contains computed fields (that is, you want your JSON output to contain fields that aren't actually stored in the database), that's easy too. Just define your computed fields as #propertys, and extend the keys() method like so:
class MyModel(db.Model, ModelMixin):
id = db.Column(db.Integer, primary_key=True)
foo = db.Column(...)
bar = db.Column(...)
#property
def computed_field(self):
return 'this value did not come from the db'
def keys(self):
return super().keys() + ['computed_field']
Now it's trivial to jsonify:
#app.route('/whatever', methods=['GET'])
def whatever():
return jsonify(dict(results=MyModel.query.all()))
If you are using flask-restful you can use marshal:
from flask.ext.restful import Resource, fields, marshal
topic_fields = {
'title': fields.String,
'content': fields.String,
'uri': fields.Url('topic'),
'creator': fields.String,
'created': fields.DateTime(dt_format='rfc822')
}
class TopicListApi(Resource):
def get(self):
return {'topics': [marshal(topic, topic_fields) for topic in DbTopic.query.all()]}
You need to explicitly list what you are returning and what type it is, which I prefer anyway for an api. Serialization is easily taken care of (no need for jsonify), dates are also not a problem. Note that the content for the uri field is automatically generated based on the topic endpoint and the id.
Here's my answer if you're using the declarative base (with help from some of the answers already posted):
# in your models definition where you define and extend declarative_base()
from sqlalchemy.ext.declarative import declarative_base
...
Base = declarative_base()
Base.query = db_session.query_property()
...
# define a new class (call "Model" or whatever) with an as_dict() method defined
class Model():
def as_dict(self):
return { c.name: getattr(self, c.name) for c in self.__table__.columns }
# and extend both the Base and Model class in your model definition, e.g.
class Rating(Base, Model):
____tablename__ = 'rating'
id = db.Column(db.Integer, primary_key=True)
fullurl = db.Column(db.String())
url = db.Column(db.String())
comments = db.Column(db.Text)
...
# then after you query and have a resultset (rs) of ratings
rs = Rating.query.all()
# you can jsonify it with
s = json.dumps([r.as_dict() for r in rs], default=alchemyencoder)
print (s)
# or if you have a single row
r = Rating.query.first()
# you can jsonify it with
s = json.dumps(r.as_dict(), default=alchemyencoder)
# you will need this alchemyencoder where your are calling json.dumps to handle datetime and decimal format
# credit to Joonas # http://codeandlife.com/2014/12/07/sqlalchemy-results-to-json-the-easy-way/
def alchemyencoder(obj):
"""JSON encoder function for SQLAlchemy special classes."""
if isinstance(obj, datetime.date):
return obj.isoformat()
elif isinstance(obj, decimal.Decimal):
return float(obj)
Flask-Restful 0.3.6 the Request Parsing recommend marshmallow
marshmallow is an ORM/ODM/framework-agnostic library for converting
complex datatypes, such as objects, to and from native Python
datatypes.
A simple marshmallow example is showing below.
from marshmallow import Schema, fields
class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
created_at = fields.DateTime()
from marshmallow import pprint
user = User(name="Monty", email="monty#python.org")
schema = UserSchema()
result = schema.dump(user)
pprint(result)
# {"name": "Monty",
# "email": "monty#python.org",
# "created_at": "2014-08-17T14:54:16.049594+00:00"}
The core features contain
Declaring Schemas
Serializing Objects (“Dumping”)
Deserializing Objects (“Loading”)
Handling Collections of Objects
Validation
Specifying Attribute Names
Specifying Serialization/Deserialization Keys
Refactoring: Implicit Field Creation
Ordering Output
“Read-only” and “Write-only” Fields
Specify Default Serialization/Deserialization Values
Nesting Schemas
Custom Fields
Here is a way to add an as_dict() method on every class, as well as any other method you want to have on every single class.
Not sure if this is the desired way or not, but it works...
class Base(object):
def as_dict(self):
return dict((c.name,
getattr(self, c.name))
for c in self.__table__.columns)
Base = declarative_base(cls=Base)
I've been looking at this problem for the better part of a day, and here's what I've come up with (credit to https://stackoverflow.com/a/5249214/196358 for pointing me in this direction).
(Note: I'm using flask-sqlalchemy, so my model declaration format is a bit different from straight sqlalchemy).
In my models.py file:
import json
class Serializer(object):
__public__ = None
"Must be implemented by implementors"
def to_serializable_dict(self):
dict = {}
for public_key in self.__public__:
value = getattr(self, public_key)
if value:
dict[public_key] = value
return dict
class SWEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Serializer):
return obj.to_serializable_dict()
if isinstance(obj, (datetime)):
return obj.isoformat()
return json.JSONEncoder.default(self, obj)
def SWJsonify(*args, **kwargs):
return current_app.response_class(json.dumps(dict(*args, **kwargs), cls=SWEncoder, indent=None if request.is_xhr else 2), mimetype='application/json')
# stolen from https://github.com/mitsuhiko/flask/blob/master/flask/helpers.py
and all my model objects look like this:
class User(db.Model, Serializer):
__public__ = ['id','username']
... field definitions ...
In my views I call SWJsonify wherever I would have called Jsonify, like so:
#app.route('/posts')
def posts():
posts = Post.query.limit(PER_PAGE).all()
return SWJsonify({'posts':posts })
Seems to work pretty well. Even on relationships. I haven't gotten far with it, so YMMV, but so far it feels pretty "right" to me.
Suggestions welcome.
I was looking for something like the rails approach used in ActiveRecord to_json and implemented something similar using this Mixin after being unsatisfied with other suggestions. It handles nested models, and including or excluding attributes of the top level or nested models.
class Serializer(object):
def serialize(self, include={}, exclude=[], only=[]):
serialized = {}
for key in inspect(self).attrs.keys():
to_be_serialized = True
value = getattr(self, key)
if key in exclude or (only and key not in only):
to_be_serialized = False
elif isinstance(value, BaseQuery):
to_be_serialized = False
if key in include:
to_be_serialized = True
nested_params = include.get(key, {})
value = [i.serialize(**nested_params) for i in value]
if to_be_serialized:
serialized[key] = value
return serialized
Then, to get the BaseQuery serializable I extended BaseQuery
class SerializableBaseQuery(BaseQuery):
def serialize(self, include={}, exclude=[], only=[]):
return [m.serialize(include, exclude, only) for m in self]
For the following models
class ContactInfo(db.Model, Serializer):
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
full_name = db.Column(db.String())
source = db.Column(db.String())
source_id = db.Column(db.String())
email_addresses = db.relationship('EmailAddress', backref='contact_info', lazy='dynamic')
phone_numbers = db.relationship('PhoneNumber', backref='contact_info', lazy='dynamic')
class EmailAddress(db.Model, Serializer):
id = db.Column(db.Integer, primary_key=True)
email_address = db.Column(db.String())
type = db.Column(db.String())
contact_info_id = db.Column(db.Integer, db.ForeignKey('contact_info.id'))
class PhoneNumber(db.Model, Serializer):
id = db.Column(db.Integer, primary_key=True)
phone_number = db.Column(db.String())
type = db.Column(db.String())
contact_info_id = db.Column(db.Integer, db.ForeignKey('contact_info.id'))
phone_numbers = db.relationship('Invite', backref='phone_number', lazy='dynamic')
You could do something like
#app.route("/contact/search", methods=['GET'])
def contact_search():
contact_name = request.args.get("name")
matching_contacts = ContactInfo.query.filter(ContactInfo.full_name.like("%{}%".format(contact_name)))
serialized_contact_info = matching_contacts.serialize(
include={
"phone_numbers" : {
"exclude" : ["contact_info", "contact_info_id"]
},
"email_addresses" : {
"exclude" : ["contact_info", "contact_info_id"]
}
}
)
return jsonify(serialized_contact_info)
I was working with a sql query defaultdict of lists of RowProxy objects named jobDict
It took me a while to figure out what Type the objects were.
This was a really simple quick way to resolve to some clean jsonEncoding just by typecasting the row to a list and by initially defining the dict with a value of list.
jobDict = defaultdict(list)
def set_default(obj):
# trickyness needed here via import to know type
if isinstance(obj, RowProxy):
return list(obj)
raise TypeError
jsonEncoded = json.dumps(jobDict, default=set_default)
I just want to add my method to do this.
just define a custome json encoder to serilize your db models.
class ParentEncoder(json.JSONEncoder):
def default(self, obj):
# convert object to a dict
d = {}
if isinstance(obj, Parent):
return {"id": obj.id, "name": obj.name, 'children': list(obj.child)}
if isinstance(obj, Child):
return {"id": obj.id, "name": obj.name}
d.update(obj.__dict__)
return d
then in your view function
parents = Parent.query.all()
dat = json.dumps({"data": parents}, cls=ParentEncoder)
resp = Response(response=dat, status=200, mimetype="application/json")
return (resp)
it works well though the parent have relationships
It's been a lot of times and there are lots of valid answers, but the following code block seems to work:
my_object = SqlAlchemyModel()
my_serializable_obj = my_object.__dict__
del my_serializable_obj["_sa_instance_state"]
print(jsonify(my_serializable_object))
I'm aware that this is not a perfect solution, nor as elegant as the others, however for those who want o quick fix, they might try this.