flask-restplus fields.Nested() with raw Dict (not model) - python

Spoiler alert: I posted my solution as an answer to this question
I am using flastk-resptlus to create an API. I have to provide the data in a specific structure, which I have problems to get, see an example below:
What I need to get is this structure:
{
"metadata": {
"files": []
},
"result" : {
"data": [
{
"user_id": 1,
"user_name": "user_1",
"user_role": "editor"
},
{
"user_id": 2
"user_name": "user_2",
"user_role": "editor"
},
{
"user_id": 3,
"user_name": "user_3",
"user_role": "curator"
}
]
}
}
But the problem comes that I cannot manage to get the structure of "result" : { "data": []} without making "data" a model itself.
What I tried to do so far (and did not work)
# define metadata model
metadata_model = api.model('MetadataModel', {
"files": fields.List(fields.String(required=False, description='')),
}
# define user model
user_model = api.model('UserModel', {
"user_id": fields.Integer(required=True, description=''),
"user_name": fields.String(required=True, description=''),
"user_role": fields.String(required=False, description='')
}
# here is where I have the problems
user_list_response = api.model('ListUserResponse', {
'metadata': fields.Nested(metadata_model),
'result' : {"data" : fields.List(fields.Nested(user_model))}
})
Complains that cannot get the "schema" from "data" (because is not a defined model), but I don't want to be a new api model, just want to append a key called "data". Any suggestions?
This I tried and works, but is not what I want (because I miss the "data"):
user_list_response = api.model('ListUserResponse', {
'metadata': fields.Nested(metadata_model),
'result' : fields.List(fields.Nested(user_model))
})
I don't want data to be a model because the common structure of the api is the following:
{
"metadata": {
"files": []
},
"result" : {
"data": [
<list of objects> # here must be listed the single model
]
}
}
Then, <list of objects> can be users, addresses, jobs, whatever.. so I want to make a "general structure" in which then I can just inject the particular models (UserModel, AddressModel, JobModel, etc) without creating a special data model for each one.

A possible approach is to use fields.Raw which returns whatever serializable object you pass. Then, you can define a second function, which creates your result and uses marshal. marshal transforms your data according to a model and accepts an additional parameter called envelope. envelope surrounds your modeled data by a given key and does the trick.
from flask import Flask
from flask_restplus import Api, fields, Resource, marshal
app = Flask(__name__)
api = Api()
api.init_app(app)
metadata_model = api.model("metadata", {
'file': fields.String()
})
user_model = api.model('UserModel', {
"user_id": fields.Integer(required=True, description=''),
"user_name": fields.String(required=True, description=''),
"user_role": fields.String(required=False, description='')
})
response_model = api.model("Result", {
'metadata': fields.List(fields.Nested(metadata_model)),
'result': fields.Raw()
})
#api.route("/test")
class ApiView(Resource):
#api.marshal_with(response_model)
def get(self):
data = {'metadata': {},
'result': self.get_user()}
return data
def get_user(self):
# Access database and get data
user_data = [{'user_id': 1, 'user_name': 'John', 'user_role': 'editor'},
{'user_id': 2, 'user_name': 'Sue', 'user_role': 'curator'}]
# The kwarg envelope does the trick
return marshal(user_data, user_model, envelope='data')
app.run(host='0.0.0.0', debug=True)

My workaround solution that solves all my problems:
I create a new List fields class (it is mainly copied from fields.List), and then I just tune the output format and the schema in order to get the 'data' as key:
class ListData(fields.Raw):
'''
Field for marshalling lists of other fields.
See :ref:`list-field` for more information.
:param cls_or_instance: The field type the list will contain.
This is a modified version of fields.List Class in order to get 'data' as key envelope
'''
def __init__(self, cls_or_instance, **kwargs):
self.min_items = kwargs.pop('min_items', None)
self.max_items = kwargs.pop('max_items', None)
self.unique = kwargs.pop('unique', None)
super(ListData, self).__init__(**kwargs)
error_msg = 'The type of the list elements must be a subclass of fields.Raw'
if isinstance(cls_or_instance, type):
if not issubclass(cls_or_instance, fields.Raw):
raise MarshallingError(error_msg)
self.container = cls_or_instance()
else:
if not isinstance(cls_or_instance, fields.Raw):
raise MarshallingError(error_msg)
self.container = cls_or_instance
def format(self, value):
if isinstance(value, set):
value = list(value)
is_nested = isinstance(self.container, fields.Nested) or type(self.container) is fields.Raw
def is_attr(val):
return self.container.attribute and hasattr(val, self.container.attribute)
# Put 'data' as key before the list, and return the dict
return {'data': [
self.container.output(idx,
val if (isinstance(val, dict) or is_attr(val)) and not is_nested else value)
for idx, val in enumerate(value)
]}
def output(self, key, data, ordered=False, **kwargs):
value = fields.get_value(key if self.attribute is None else self.attribute, data)
if fields.is_indexable_but_not_string(value) and not isinstance(value, dict):
return self.format(value)
if value is None:
return self._v('default')
return [marshal(value, self.container.nested)]
def schema(self):
schema = super(ListData, self).schema()
schema.update(minItems=self._v('min_items'),
maxItems=self._v('max_items'),
uniqueItems=self._v('unique'))
# work around to get the documentation as I want
schema['type'] = 'object'
schema['properties'] = {}
schema['properties']['data'] = {}
schema['properties']['data']['type'] = 'array'
schema['properties']['data']['items'] = self.container.__schema__
return schema

Related

Django serializer test post of file with user information

I try to test a file upload like this:
#deconstructible
class FileGenerator:
#staticmethod
def generate_text_file(file_ending='txt'):
file_content = b'some test string'
file = io.BytesIO(file_content)
file.name = f'test.{file_ending}'
file.seek(0)
return file
def test_this(self, api_client, login_as):
user = login_as('quality-controller')
url = reverse('test-list')
organization = Organization(name="test")
organization.save()
data = {
"organization": organization.id,
"import_file": FileGenerator.generate_text_file('txt'),
"user": {
"id": user.id,
"username": user.username,
}
}
response = api_client.post(url, data, format='json')
But I receive the following error message:
b'{"import_file": ["The submitted data was not a file. Check the
encoding type on the form."]}'
I also tried to use: format='multipart' but then I receive the following error:
AssertionError: Test data contained a dictionary value for key 'user',
but multipart uploads do not support nested data. You may want to
consider using format='json' in this test case.
How can I solve this?
This is how I deal with this issue:
Simplest: flatten the form
Suck it up and just remove the issue by making your serializer to use user_id and user_username and fix it up on the server side in the serializer's validate(self, attrs) method. A bit ugly/hacky but it works just fine and can be documented.
def validate(self, attrs):
attrs["user"] = {
"id": attrs.pop("user_id"),
"name": attrs.pop("user_username")
}
return attrs
Nicest if you dont mind the size: B64 Fields
You can base64 encode the file field and pass it in the json. Then to decode it on the server side you would write (or search for) a simple Base64FileField() for DRF.
class UploadedBase64ImageSerializer(serializers.Serializer):
file = Base64ImageField(required=False)
created = serializers.DateTimeField()
Alternative - Flatten the form data
You can't pass nested data, but you can flatten the nested dicts and pass that to a DRF service. Serializers actually can understand nested data if the field names are correct.
I don't know if this field name format is standardized, but this is what worked for me after experimentation. I only use it for service->service communication TO drf, so you would have to clone it into JS, but you can use the python in unit tests. Let me know if it works for you.
def flatten_dict_for_formdata(input_dict, array_separator="[{i}]"):
"""
Recursively flattens nested dict()s into a single level suitable
for passing to a library that makes multipart/form-data posts.
"""
def __flatten(value, prefix, result_dict, previous=None):
if isinstance(value, dict):
# If we just processed a dict, then separate with a "."
# Don't do this if it is an object inside an array.
# In that case the [:id] _is_ the separator, adding
# a "." like list[1].name will break but list[x]name
# is correct (at least for DRF/django decoding)
if previous == "dict":
prefix += "."
for key, v in value.items():
__flatten(
value=v,
prefix=prefix + key,
result_dict=result_dict,
previous="dict"
)
elif isinstance(value, list) or isinstance(value, tuple):
for i, v in enumerate(value):
__flatten(
value=v,
prefix=prefix + array_separator.format(i=i), # e.g. name[1]
result_dict=result_dict,
previous="array"
)
else:
result_dict[prefix] = value
# return her to simplify the caller's life. ignored during recursion
return result_dict
return __flatten(input_dict, '', OrderedDict(), None)
# flatten_dict_for_formdata({...}):
{ # output field name
"file": SimpleUploadFile(...), # file
"user": {
"id": 1, # user.id
"name": "foghorn", # user.name
"jobs": [
"driver", # user.jobs[0]
"captain", # user.jobs[1]
"pilot" # user.jobs[1]
]
},
"objects": [
{
"type": "shoe", # objects[0]type
"size": "44" # objects[0]size
},
]
}

Union type cannot resolve Object Type at Runtime

I am setting up a GraphQL Server with Python using Starlette and Graphene and ran into a problem I cannot find a solution for. The Graphene Documentation does not go into detail regarding the union type, which I am trying to implement.
I set up a minimum example based on the graphene documentation which you can run to replicate this problem
import os
import uvicorn
from graphene import ObjectType, Field, List, String, Int, Union
from graphene import Schema
from starlette.applications import Starlette
from starlette.graphql import GraphQLApp
from starlette.routing import Route
mock_data = {
"episode": 3,
"characters": [
{
"type": "Droid",
"name": "R2-D2",
"primaryFunction": "Astromech"
},
{
"type": "Human",
"name": "Luke Skywalker",
"homePlanet": "Tatooine"
},
{
"type": "Starship",
"name": "Millennium Falcon",
"length": 35
}
]
}
class Human(ObjectType):
name = String()
homePlanet = String()
class Droid(ObjectType):
name = String()
primary_function = String()
class Starship(ObjectType):
name = String()
length = Int()
class Characters(Union):
class Meta:
types = (Human, Droid, Starship)
class SearchResult(ObjectType):
characters = List(Characters)
episode = Int()
class RootQuery(ObjectType):
result = Field(SearchResult)
#staticmethod
def resolve_result(_, info):
return mock_data
graphql_app = GraphQLApp(schema=Schema(query=RootQuery))
routes = [
Route("/graphql", graphql_app),
]
api = Starlette(routes=routes)
if __name__ == "__main__":
uvicorn.run(api, host="127.0.0.1", port=int(os.environ.get("PORT", 8080)))
If you then go to http://localhost:8080/graphq and enter the following query
query Humans{
result {
episode
characters {
... on Human {
name
}
}
}
}
I get this error
{
"data": {
"result": {
"episode": 3,
"characters": null
}
},
"errors": [
{
"message": "Abstract type Characters must resolve to an Object type at runtime for field SearchResult.characters with value \"[{'type': 'Droid', 'name': 'R2-D2', 'primaryFunction': 'Astromech'}, {'type': 'Human', 'name': 'Luke Skywalker', 'homePlanet': 'Tatooine'}, {'type': 'Starship', 'name': 'Millennium Falcon', 'length': 35}]\", received \"None\".",
"locations": [
{
"line": 4,
"column": 5
}
]
}
]
}
which I am now stuck with. Maybe someone has done this already and can help out? How can I resolve this at runtime. I have already tried different approaches for example I changed classes Character and RootQuery:
class Character(Union):
class Meta:
types = (Human, Droid, Starship)
def __init__(self, data, *args, **kwargs):
super().__init__(*args, **kwargs)
self.data = data
self.type = data.get("type")
def resolve_type(self, info):
if self.type == "Human":
return Human
if self.type == "Droid":
return Droid
if self.type == "Starship":
return Starship
class RootQuery(ObjectType):
result = Field(SearchResult)
#staticmethod
def resolve_result(_, info):
return {**mock_data, "characters": [Character(character) for character in mock_data.get('characters')]}
resulting in
{
"data": {
"result": {
"episode": 3,
"characters": [
{},
{
"name": null
},
{}
]
}
}
}
Any ideas would be very appreciated!
jkimbo answered the question here:
class Character(Union):
class Meta:
types = (Human, Droid, Starship)
#classmethod
def resolve_type(cls, instance, info):
if instance["type"] == "Human":
return Human
if instance["type"] == "Droid":
return Droid
if instance["type"] == "Starship":
return Starship
class RootQuery(ObjectType):
result = Field(SearchResult)
def resolve_result(_, info):
return mock_data
Note I'm just returning mock_data and I've updated the resolve_type method to switch based on the data. The Union type uses the same resolve_type method as Interface to figure out what type to resolve to at runtime: https://docs.graphene-python.org/en/latest/types/interfaces/#resolving-data-objects-to-types

How can I define custom output types for mutations with graphene-django?

Create/remove/update/delete (CRUD) mutations usually return the corresponding database model instance as output type of the mutation. However for non-CRUD mutations I'd like to define business logic specific mutation output types. E.g. returning the count of list elements + a list of IDs which cannot be mapped 1-to-1 between graphql type and db models. How can I achieve this with graphene-django?
List not related to Models
As you want to return both a count and a list of elements, you can create a custom type:
class ListWithCountType(graphene.Scalar):
#staticmethod
def serialize(some_argument):
# make computation here
count = ...
some_list = ...
return { "count": count, "list": some_list }
Then on your mutation you use it like this:
class MyMutation(graphene.Mutation):
list_with_count = graphene.Field(ListWithCountType)
#classmethod
def mutate(cls, root, info, **kwargs):
some_argument = kwargs.pop("some_argument")
return cls(list_with_count=some_argument)
Add to your schema:
class Query(graphene.ObjectType):
my_mutation = MyMutation.Field()
Should return something like:
{
"data": {
"list_with_count": {
"count": <COUNT VALUE>,
"list": <SOME_LIST VALUE>
}
}
}
*PS: if this is only an output, ok. But if you want this type to be an argument, you should also implement "parse_literal" and "parse_value", besides the "serialize".
Here is an example with a custom ErrorType used with forms.
List related to Models
From the docs:
# cookbook/ingredients/schema.py
import graphene
from graphene_django.types import DjangoObjectType
from cookbook.ingredients.models import Category
class CategoryType(DjangoObjectType):
class Meta:
model = Category
class Query(object):
all_categories = graphene.List(CategoryType)
def resolve_all_categories(self, info, **kwargs):
return Category.objects.all()
On your schema:
import graphene
import cookbook.ingredients.schema
class Query(cookbook.ingredients.schema.Query, graphene.ObjectType):
pass
schema = graphene.Schema(query=Query)
Then you can query like:
query {
allCategories {
id
}
}
Should return something like:
{
"data": {
"allCategories": [
{
"id": "1",
},
{
"id": "2",
},
{
"id": "3",
},
{
"id": "4",
}
]
}
}
Here is an example with user model.

Place Primary key inside JSON response

The JSON response i'm getting is as below.
In my code i'am trying to fetch the list from the db, as per above img. In the result primary key is coming outside of the fields for each record. How can i place it inside the fields for every record like.
"results":[
"fields":{
"pk": "F09087687633",
"company_name": "Tata",
}
]
Below is my code:
views.py (In below code for loop is to remove app name from the results, same i use to remove primary key, it is working but how can i place it inside for each fields.)
#csrf_exempt
def fleet_owner_list(request):
page_number = json.loads(request.body)
records,total_pages = FleetOwner.get_all_owners(page_number)
for data in range(len(records)):
del records[data]['model']
returnObject = {
"page" : page_number,
"total_results":len(records),
"total_pages":total_pages,
"status" : messages.RETRIVAL_SUCCESS,
"message" : messages.FLEETOWNER_DATA_RETRIEVE_SUCCESS,
"results" : records
}
models.py
#classmethod
def get_all_owners(cls,page_number):
data = cls.objects.filter(is_deleted = False)
page_numbers = page_number
pegination_result, total_page_count = list_paginate(data, page_numbers)
data = serializers.serialize("json", pegination_result)
data = json.loads(data)
return data, total_page_count
paginator.py (This is a common function i'm using for all the list functions to perform django pagination.)
def list_paginate(data,page_numbers):
paginator = Paginator(data,10)
page = page_numbers
try :
records = paginator.page(page)
except PageNotAnInteger:
records = paginator.page(1)
except EmptyPage:
records = paginator.page(paginator.num_pages)
return records, paginator.num_pages
pegination_result, total_page_count = list_paginate(data, page_numbers)
data = serializers.serialize("json", pegination_result)
data = json.loads(data)
return data, total_page_count
By default this is how Django serialize a queryset into JSON Objects. The Django documentation also state the same. The whole collection is just represented as an array and the objects are represented by JSON objects with three properties: “pk”, “model” and “fields”. “fields” is again an object containing each field’s name and value as property and property-value respectively. i.e.,
[
{
"pk": "4b678b301dfd8a4e0dad910de3ae245b",
"model": "sessions.session",
"fields": {
"expire_date": "2013-01-16T08:16:59.844Z",
...
}
}
]
If you look at the Serializer class implementation you can find a get_dump_object method which is responsible for the final JSON output for an object. i.e.,
def get_dump_object(self, obj):
data = {'model': str(obj._meta)}
if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
data["pk"] = self._value_from_field(obj, obj._meta.pk)
data['fields'] = self._current
return data
In other teams this is the method responsible for the below format,
{
"pk": "pk",
"model": "model",
"fields": {
"field1": "2013-01-16T08:16:59.844Z",
...
}
}
Since you want the pk field inside the fields you should create your own JSON serializer class and override the get_dump_object method like the below one,
>>> import json
>>> from pprint import pprint
>>> from django.core.serializers.json import Serializer as JSONSerializer
>>>
>>> class MyCustomSerializer(JSONSerializer):
... def get_dump_object(self, obj):
... data = {'model': str(obj._meta)}
... data['fields'] = self._current
... if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
... data['fields'].update({'pk': self._value_from_field(obj, obj._meta.pk)})
... return data
...
>>> pprint(json.loads(MyCustomSerializer().serialize(User.objects.all())))
[{'fields': {'date_joined': '2019-07-13T05:52:37.885Z',
'email': 'user1#gmail.com',
'first_name': 'User',
'groups': [],
'is_active': True,
'is_staff': False,
'is_superuser': False,
'last_login': None,
'last_name': '1',
'password': '',
'pk': 1,
'user_permissions': [],
'username': ''},
'model': 'auth.user'}]
# You can see that `pk` is now inside the 'fields' key.
>>> json.loads(MyCustomSerializer().serialize(User.objects.all()))[0]['fields']['pk']
1

Adding session attributes in Python for Alexa skills

I have 3 slots (account, dollar_value, recipient_first) within my intent schema for an Alexa skill and I want to save whatever slots are provided by the speaker in the session Attributes.
I am using the following methods to set session attributes:
def create_dollar_value_attribute(dollar_value):
return {"dollar_value": dollar_value}
def create_account_attribute(account):
return {"account": account}
def create_recipient_first_attribute(recipient_first):
return {"recipient_first": recipient_first}
However, as you may guess, if I want to save more than one slot as data in sessionAttributes, the sessionAttributes is overwritten as in the following case:
session_attributes = {}
if session.get('attributes', {}) and "recipient_first" not in session.get('attributes', {}):
recipient_first = intent['slots']['recipient_first']['value']
session_attributes = create_recipient_first_attribute(recipient_first)
if session.get('attributes', {}) and "dollar_value" not in session.get('attributes', {}):
dollar_value = intent['slots']['dollar_value']['value']
session_attributes = create_dollar_value_attribute(dollar_value)
The JSON response from my lambda function for a speech input in which two slots (dollar_value and recipient_first) were provided is as follows (my guess is that the create_dollar_value_attribute method in the second if statement is overwriting the first):
{
"version": "1.0",
"response": {
"outputSpeech": {
"type": "PlainText",
"text": "Some text output"
},
"card": {
"content": "SessionSpeechlet - Some text output",
"title": "SessionSpeechlet - Send Money",
"type": "Simple"
},
"reprompt": {
"outputSpeech": {
"type": "PlainText"
}
},
"shouldEndSession": false
},
"sessionAttributes": {
"dollar_value": "30"
}
}
The correct response for sessionAttributes should be:
"sessionAttributes": {
"dollar_value": "30",
"recipient_first": "Some Name"
},
How do I create this response? Is there a better way to add values to sessionAttributes in the JSON response?
The easiest way to add sessionAttributes with Python in my opinion seems to be by using a dictionary. For example, if you want to store some of the slots for future in the session attributes:
session['attributes']['slotKey'] = intent['slots']['slotKey']['value']
Next, you can just pass it on to the build response method:
buildResponse(session['attributes'], buildSpeechletResponse(title, output, reprompt, should_end_session))
The implementation in this case:
def buildSpeechletResponse(title, output, reprompt_text, should_end_session):
return {
'outputSpeech': {
'type': 'PlainText',
'text': output
},
'card': {
'type': 'Simple',
'title': "SessionSpeechlet - " + title,
'content': "SessionSpeechlet - " + output
},
'reprompt': {
'outputSpeech': {
'type': 'PlainText',
'text': reprompt_text
}
},
'shouldEndSession': should_end_session
}
def buildResponse(session_attributes, speechlet_response):
return {
'version': '1.0',
'sessionAttributes': session_attributes,
'response': speechlet_response
}
This creates the sessionAttributes in the recommended way in the Lambda response JSON.
Also just adding a new sessionAttribute doesn't overwrite the last one if it doesn't exist. It will just create a new key-value pair.
Do note, that this may work well in the service simulator but may return a key attribute error when testing on an actual Amazon Echo. According to this post,
On Service Simulator, sessions starts with Session:{ ... Attributes:{}, ... }
When sessions start on the Echo, Session does not have an Attributes key at all.
The way I worked around this was to just manually create it in the lambda handler whenever a new session is created:
if event['session']['new']:
event['session']['attributes'] = {}
onSessionStarted( {'requestId': event['request']['requestId'] }, event['session'])
if event['request']['type'] == 'IntentRequest':
return onIntent(event['request'], event['session'])
First, you have to define the session_attributes.
session_attributes = {}
Then instead of using
session_attributes = create_recipient_first_attribute(recipient_first)
You should use
session_attributes.update(create_recipient_first_attribute(recipient_first)).
The problem you are facing is because you are reassigning the session_attributes. Instead of this, you should just update the session_attributes.
So your final code will become:
session_attributes = {}
if session.get('attributes', {}) and "recipient_first" not in session.get('attributes', {}):
recipient_first = intent['slots']['recipient_first']['value']
session_attributes.update(create_recipient_first_attribute(recipient_first))
if session.get('attributes', {}) and "dollar_value" not in session.get('attributes', {}):
dollar_value = intent['slots']['dollar_value']['value']
session_attributes.update(create_dollar_value_attribute(dollar_value))
The ASK SDK for Python provides an attribute manager, to manage request/session/persistence level attributes in the skill. You can look at the color picker sample, to see how to use these attributes in skill development.
Take a look at the below:
account = intent['slots']['account']['value']
dollar_value = intent['slots']['dollar_value']['value']
recipient_first = intent['slots']['recipient_first']['value']
# put your data in a dictionary
attributes = {
'account':account,
'dollar_value':dollar_value,
'recipient_first':recipient_first
}
Put the attributes dictionary in 'sessionAttributes' in your response. You should get it back in 'sessionAttributes' once Alexa replies to you.
Hope this helps.
The following code snippet will also prevent overwriting the session attributes:
session_attributes = session.get('attributes', {})
if "recipient_first" not in session_attributes:
session_attributes['recipient_first'] = intent['slots']['recipient_first']['value']
if "dollar_value" not in session_attributes:
session_attributes['dollar_value'] = = intent['slots']['dollar_value']['value']

Categories