Is it possible to validate list using marshmallow? - python

Is it possible to validate list using marshmallow?
class SimpleListInput(Schema):
items = fields.List(fields.String(), required=True)
# expected invalid type error
data, errors = SimpleListInput().load({'some': 'value'})
# should be ok
data, errors = SimpleListInput().load(['some', 'value'])
Or it is expected to validate only objects?

To validate top-level lists, you need to instantiate your list item schema with many=True argument.
Example:
class UserSchema(marshmallow.Schema):
name = marshmallow.fields.String()
data, errors = UserSchema(many=True).load([
{'name': 'John Doe'},
{'name': 'Jane Doe'}
])
But it still needs to be an object schema, Marshmallow does not support using top-level non-object lists. In case you need to validate top-level list of non-object types, a workaround would be to define a schema with one List field of your types and just wrap payload as if it was an object:
class SimpleListInput(marshmallow.Schema):
items = marshmallow.fields.List(marshmallow.fields.String(), required=True)
payload = ['foo', 'bar']
data, errors = SimpleListInput().load({'items': payload})

SimpleListInput is a class with a property "items". The property "items" is who accepts a list of strings.
>>> data, errors = SimpleListInput().load({'items':['some', 'value']})
>>> print data, errors
{'items': [u'some', u'value']}
{}
>>> data, errors = SimpleListInput().load({'items':[]})
>>> print data, errors
{'items': []}
{}
>>> data, errors = SimpleListInput().load({})
>>> print data, errors
{}
{'items': [u'Missing data for required field.']}
If you want a custom validate, for example, not accept an empty list in "items":
from marshmallow import fields, Schema, validates, ValidationError
class SimpleListInput(Schema):
items = fields.List(fields.String(), required=True)
#validates('items')
def validate_length(self, value):
if len(value) < 1:
raise ValidationError('Quantity must be greater than 0.')
Then...
>>> data, errors = SimpleListInput().load({'items':[]})
>>> print data, errors
{'items': []}
{'items': ['Quantity must be greater than 0.']}
Take a look at Validation
UPDATE:
As #Turn commented below. You can do this:
from marshmallow import fields, Schema, validate
class SimpleListInput(Schema):
items = fields.List(fields.String(), required=True, validate=validate.Length(min=1))

Please take a look at a little library written by me which tries to solve exactly this problem: https://github.com/and-semakin/marshmallow-toplevel.
Installation:
pip install marshmallow-toplevel
Usage (on the example from Maxim Kulkin):
import marshmallow
from marshmallow_toplevel import TopLevelSchema
class SimpleListInput(TopLevelSchema):
_toplevel = marshmallow.fields.List(
marshmallow.fields.String(),
required=True,
validate=marshmallow.validate.Length(1, 10)
)
# raises ValidationError, because:
# Length must be between 1 and 10.
SimpleListInput().load([])
# raises ValidationError, because:
# Length must be between 1 and 10.
SimpleListInput().load(["qwe" for _ in range(11)])
# successfully loads data
payload = ["foo", "bar"]
data = SimpleListInput().load(payload)
assert data == ["foo", "bar"]
Of course it can be used with more complex schemas than just string in example.

It is possible to use a Field type directly [documentation]:
simple_list_input = fields.List(fields.String(), required=True)
# ValidationError: Not a valid list.
simple_list_input.deserialize({'some': 'value'})
# ValidationError: {0: ['Not a valid string.']}
simple_list_input.deserialize([1, 'value'])
# Returns: ['some', 'value'] with no errors
simple_list_input.deserialize(['some', 'value'])
In comparison with Schema:
deserialize == load
serialize == dump

Related

Create dynamic Pydantic model with typed optional values

I want to create a dynamic model from my data, create JSON from it, and load it in another location.
I can't find a way to define a field in a way, that should it is optional, but when data exists for that field - it would be validated.
This is for required field:
fields[col_name] = (data_type, None)
# resolve types for data
data_type = resolve_type(data)
required = is_required(data)
if required:
fields[col_name] = (data_type, ...)
else:
fields[col_name] = (data_type, None) <--- unclear
...
pydantic.create_model(name, **fields)
The above configuration generates JSON model that makes fields optional and typed, but then I validate by using the input data I can't pass None values - '$.inputs.0.Field', 'message': "None is not of type 'string'"
So my question - how to declare a field that would validate input, but only when it's not None.
And in such a manner, that I could create JSON schema and load it in another location.
Implemented workaround based on suggestions in https://github.com/samuelcolvin/pydantic/issues/990
optional_fields = list()
if required:
fields[col_name] = (data_type, ...)
else:
fields[col_name] = (data_type, None)
optional_fields.append(col_name)
model = pydantic.create_model(name, **fields)
def schema_extra(schema, model):
for column in optional_fields:
original_type = schema["properties"][column]["type"]
schema["properties"][column].update({"type": ["null", original_type]})
model.__config__.schema_extra = schema_extra
schema_json = model.schema_json()
Foo = create_model("Foo", foo = (int, None))
Foo(foo=None)
# Foo(foo=None)
Foo()
# Foo(foo=None)
Foo(foo=3)
# Foo(foo=3)
Is this your desired result?

Allowing empty dates with Marshmallow

I try to get data from a webpage. This page contains several release information, but allow values not to be set. I.e. the date for testing from/to might be an empty string.
Now I try to deserialize all my data sucked from the page to insert it to a database and face problems handling empty dates.
from marshmallow import fields, Schema, ValidationError
class TestSchema(Schema):
training_necessary = fields.Function(deserialize=lambda x: True if x == 'Yes' else False)
test_from = fields.Date()
test_to = fields.Date()
data = dict(training_necessary='Yes', test_from='', test_to='')
try:
validated = TestSchema().load(data)
except ValidationError as err:
print(f"{err}")
Result:
{'test_to': ['Not a valid date.'], 'test_from': ['Not a valid date.']}
I already tried several combinations of allow_none=True or default='' but none of them helped my to get through. So, how to manage to allow empty dates? Setting a default to somewhat like 1970-01-01 won't help in that case.
Any hints?
Regards, Thomas
+++ EDIT: SOLUTION +++
Here's the working code I ended up after Jérômes helpful tipp:
from marshmallow import fields, Schema, ValidationError, pre_load
class TestSchema(Schema):
training_necessary = fields.Function(deserialize=lambda x: True if x == 'Yes' else False)
test_from = fields.Date(allow_none=True)
test_to = fields.Date(allow_none=True)
#pre_load(pass_many=False)
def string_to_none(self, data, many, **kwargs):
turn_to_none = lambda x: None if x == '' else x
for k, v in data.items():
data[k] = turn_to_none(v)
return data
data = dict(training_necessary='Yes', test_from='', test_to='')
try:
validated = TestSchema().load(data)
except ValidationError as err:
print(f"{err}")
I would pass no value at all.
data = dict(training_necessary='Yes')
Or I'd make the date fields allow_none and I'd pass None, not an empty string.
data = dict(training_necessary='Yes', test_from=None, test_to=None)
If the issue is that your input contains empty strings, I'd say this is a client issue, but you can add a pre_load method to delete empty strings from the input before deserializing. This is more or less equivalent to modifying the values you scrape from the page before feeding them to marshmallow.

Django ORM, how to use values() and still work with choicefield?

I am using django v1.10.2
I am trying to create dynamic reports whereby I store fields and conditions and the main ORM model information into database.
My code for the generation of the dynamic report is
class_object = class_for_name("app.models", main_model_name)
results = (class_object.objects.filter(**conditions_dict)
.values(*display_columns)
.order_by(*sort_columns)
[:50])
So main_model_name can be anything.
This works great except that sometimes associated models of the main_model have choicefield.
So for one of the reports main_model is Pallet.
Pallet has many PalletMovement.
My display columns are :serial_number, created_at, pallet_movement__location
The first two columns are fields that belong to Pallet model.
The last one is from PalletMovement
What happens is that PalletMovement model looks like this:
class PalletMovement(models.Model):
pallet = models.ForeignKey(Pallet, related_name='pallet_movements',
verbose_name=_('Pallet'))
WAREHOUSE_CHOICES = (
('AB', 'AB-Delaware'),
('CD', 'CD-Delaware'),
)
location = models.CharField(choices=WAREHOUSE_CHOICES,
max_length=2,
default='AB',
verbose_name=_('Warehouse Location'))
Since the queryset will return me the raw values, how can I make use of the choicefield in PalletMovement model to ensure that the pallet_movement__location gives me the display of AB-Delaware or CD-Delaware?
Bear in mind that the main_model can be anything depending on what I store in the database.
Presumably, I can store more information in the database to help me do the filtering and presentation of data even better.
The values() method returns a dictionary of key-value pairs representing your field name and a corresponding value.
For example:
Model:
class MyModel(models.Model):
name = models.CharField()
surname = models.CharField()
age = models.IntegerField()
...
Query:
result = MyModel.objects.filter(surname='moutafis').values('name', 'surname')
Result:
< Queryset [{'name': 'moutafis', 'surname': 'john'}] >
You can now manipulate this result as you would a normal dictionary:
if main_model_name is 'PalletMovement':
# Make life easier
choices = dict(PalletMovement.WAREHOUSE_CHOICES)
for item in result:
item.update({
pallet_movement__location: verbal_choice.get(
pallet_movement__location, pallet_movement__location)
})
You can even make this into a function for better re-usability:
def verbalize_choices(choices_dict, queryset, search_key):
result = queryset
for item in result:
item.update({ search_key: choices_dict.get(search_key, search_key) })
return result
verbal_result = verbalize_choices(
dict(PalletMovement.WAREHOUSE_CHOICES),
result,
'pallet_movement__location'
)
I suggest the use of the update() and get() methods because they will save you from potential errors, like:
The search_key does not exist in the choice_dict then get() will return the value of the search_key
update() will try to update the given key-value pair if exists, else it will add it to the dictionary.
If the usage of the above will be in the template representation of your data, you can create a custom template filter instead:
#register.filter(name='verbalize_choice')
def choice_to_verbal(choice):
return dict(PalletMovement.WAREHOUSE_CHOICES)[choice]
Have an extra look here: Django: How to access the display value of a ChoiceField in template given the actual value and the choices?
You would use get_foo_display
In your template:
{{ obj.get_location_display }}
or
{{ obj.pallet_movement.get_location_display }}
[Edit:] As pointed out in the comments this will not work when calling values()
an alternative to create a templatetag is :
{{form.choicefield.1}}
This shows the value of the initial data of the foreign key field instead the id.
The universal solution for any main_model_name is by Django Model _meta API introspection: class_object._meta.get_field(field_name).choices
That is:
choice_dicts = {}
for field_name in display_columns:
choice_dicts[field_name] = {
k: v for k, v in class_object._meta.get_field(field_name).choices
}
out = []
for row in results:
out.append({name: choice_dicts[name].get(value, value)
for name, value in row.items()
})
The rest is a trivial example, mostly copied code from the question
>>> pallet = app.models.Pallet.objects.create()
>>> palletm = app.models.PalletMovement.objects.create(pallet=pallet, location='AB')
>>>
>>> main_model_name = 'PalletMovement'
>>> conditions_dict = {}
>>> display_columns = ['pallet_id', 'location']
>>> sort_columns = []
>>>
>>> class_object = class_for_name("app.models", main_model_name)
>>> results = (class_object.objects.filter(**conditions_dict)
... .values(*display_columns)
... .order_by(*sort_columns)
... )[:50]
>>>
>>> # *** INSERT HERE ALL CODE THAT WAS ABOVE ***
>>>
>>> print(out)
[{'location': 'AB-Delaware', 'pallet_id': 1}]
It works equally with 'pallet_id' or with 'pallet' in display_columns. Even that "_meta" starts with underscore, it is a documented API.

Checking null object from JSON response in Python

I have the following code, and it works. I am checking if a JSON object has a full field and does not contain the underlying fields (Jira API, if you're interested). Is there a more concise way of writing the for loop?
myResponse = requests.get(url,auth=(urlUser,urlPass))
jd = myResponse.json()
myVals = jd['issues']
print(myVals[0].keys())
for issue in myVals:
if issue['fields']['assignee'] is not None:
assignee = issue['fields']['assignee']['displayName']
else:
assignee = "Unassigned"
You can use dict.get with fallback dictionary:
>>> issues = {'fields': {'assignee': None}}
>>> issues['fields']['assignee'] or {} # fallback to an empty dictionary
{}
>>> (issues['fields']['assignee'] or {}).get('displayName', 'Unassigned')
'Unassigned'
for issue in myVals:
assignee = (issue['fields']['assignee'] or {}).get('displayName', 'Unassigned')
OR define fallback dictionary like below:
UNASSIGNED = {'displayName': 'Unassigned'}
for issue in myVals:
assignee = (issue['fields']['assignee'] or UNASSIGNED)['displayName']

serializing sqlalchemy class to json

I'm trying to serialize the result (a list) of an sqlalchemy query to json.
this is the class:
class Wikilink(Base):
__tablename__='Wikilinks'
__table_args__={'extend_existing':True}
id = Column(Integer,autoincrement=True,primary_key=True)
title = Column(Unicode(350))
user_ip = Column(String(50))
page = Column(String(20))
revision = Column(String(20))
timestamp = Column(String(50))
and I guess my problem is with the __repr__(self): function.
I tried something like:
return '{{0}:{"title":{1}, "Ip":{2}, "page":{3} ,"revision":{4}}}'.format(self.id,self.title.encode('utf-8'),self.user_ip,self.page,self.revision)
or:
return '{"id"={0}, "title"={1}, "Ip"={2}}'.format(self.id,self.title.encode('utf-8'),self.user_ip.encode('utf-8'),self.page,self.revision)
and I got:
TypeError(repr(o) + " is not JSON serializable")
ValueError: Single '}' encountered in format string
I tried:
return '{id=%d, title=%s, Ip=%s}'%(self.id,self.title.encode('utf-8'),self.user_ip.encode('utf-8'))
and I got:
TypeError: {id=8126, title=1 בדצמבר, Ip=147.237.70.106} is not JSON serializable
adding "" around (according to the JSON formatting) like this: "id"="%d", "title"="%s", "Ip"="%s" didn't help either.
I know this is supposed to be dead simple but I just can't get this right
actually bottle is handling the jsonification part automatically, but trying to call json.dumps on the result gives me the same errors.
Instead of trying to convert to json a string, you could define, for example, your own to_dict method that returns the dictionary structure it seems you're trying to create and, after that, generate the json from that structure:
>>> import json
>>> d = {'id':8126, 'title':u'1 בדצמבר', 'ip':'147.237.70.106'}
>>> json.dumps(d)
'{"ip": "147.237.70.106", "id": 8126, "title": "1 \\u05d1\\u05d3\\u05e6\\u05de\\u05d1\\u05e8"}'
I'm not sure I understand what you tried. Couldn't you build the dict and let json.dumps() do the work for you?
Something like:
>>> class Foo:
... id = 1
... title = 'my title'
... to_jsonize = ['id', 'title']
>>>
>>> dct = {name: getattr(Foo,name) for name in Foo.to_jsonize}
>>> import json
>>> json.dumps(dct)
'{"id": 1, "title": "my title"}'

Categories