How to emulate locally aws context for testing - python

I have created a lambda
where I retrieve region, based on the context.invoked_function_arn.
So, I would like to create and emulate this behavior using a test script file not via conftest.py because I have other unit test methods implemented in the same test script.
I find this link lambda-context which is useful and implemented the same concept in my test script. However all the time, I'm getting the AttributeError: 'function' object has no attribute 'invoked_function_arn' error message.
#pytest.fixture
def mock_lambda_context():
class ClientContext:
"""
Class for mocking Context
Has `custom`, `env`, and `client` `__slots__`
"""
__slots__ = ["custom", "env", "client"]
def make_obj_from_dict(_class, _dict, fields=None):
"""
Makes an object of `_class` from a `dict`
:param _class: A class representing the context
:type _class: `ContextClass`
:param _dict: A dictionary of data
:type _dict: `dict`
:param fields: [description], defaults to None
:type fields: [type], optional
:return: An object
:rtype: `ClientContext` class
"""
if _dict is None:
return None
obj = _class()
set_obj_from_dict(obj, _dict)
return obj
def set_obj_from_dict(obj, _dict, fields=None):
if fields is None:
fields = obj.__class__.__slots__
for field in fields:
setattr(obj, field, _dict.get(field, None))
class LambdaContext(object):
"""
Create a Lambda Context Class object
"""
def __init__(self, invokeid, client_context, invoked_function_arn=None):
self.aws_request_id = invokeid
self.log_group_name = "AWS_LAMBDA_LOG_GROUP_NAME"
self.log_stream_name = "AWS_LAMBDA_LOG_STREAM_NAME"
self.function_name = "AWS_LAMBDA_FUNCTION_NAME"
self.memory_limit_in_mb = "AWS_LAMBDA_FUNCTION_MEMORY_SIZE"
self.function_version = "AWS_LAMBDA_FUNCTION_VERSION"
self.invoked_function_arn = "arn:aws:lambda:eu-west-1:123456789012:function:" \
"ExampleLambdaFunctionResourceName-AULC3LB8Q02F"
self.client_context = make_obj_from_dict(ClientContext, client_context)
if self.client_context is not None:
self.client_context.client = None
self.identity = None
def get_remaining_time_in_millis(self):
return None
def log(self, msg):
str_msg = str(msg)
print(str_msg)
context = LambdaContext("AWS_ID", {})
return context
def test_handler(events, mock_lambda_context):
res = handler(events, mock_lambda_context)
assert res['context.invoked_function_arn'] == "arn:aws:lambda:eu-west-1:123456789012:function:ExampleLambdaFunctionResourceName-AULC3LB8Q02F"
Can anyone point me how to fix this silly one?

I created a sample_context.json like this:
{
"invoked_function_arn": "arn:aws:lambda:eu-west-1:123456789012:function:ExampleLambdaFunctionResourceName-AULC3LB8Q02F",
"log_group_name": "/aws/lambda/ExampleLambdaFunctionResourceName-AULC3LB8Q02F",
"function_name": "ExampleLambdaFunctionResourceName-AULC3LB8Q02F",
"function_version": "$LATEST"
}
Then in my main test file:
#pytest.fixture
def context_outcome():
return load_json_from_file('sample_context.json')
#pytest.fixture
def context(context_outcome):
response = type('new', (object,), context_outcome)
seqs = tuple, list, set, frozenset
for i, j in context_outcome.items():
if isinstance(j, dict):
setattr(response, i, context(j))
elif isinstance(j, seqs):
setattr(response, i,
type(j)(context(sj) if isinstance(sj, dict) else sj for sj in j))
else:
setattr(response, i, j)
return response
def test_main(events, context):
assert context.invoked_function_arn == "arn:aws:lambda:eu-west-1:123456789012:function:ExampleLambdaFunctionResourceName-AULC3LB8Q02F"

I got around this by doing the following:
from dataclasses import dataclass
#pytest.fixture
def context():
#dataclass
class LambdaContext:
function_name: str = "test"
aws_request_id: str = "88888888-4444-4444-4444-121212121212"
invoked_function_arn: str = "arn:aws:lambda:eu-west-1:123456789101:function:test"
return LambdaContext()
def test_lambda(context):
my_class = MyClass(EVENT, context)

Related

Python ThreadPoolExecutor is faster than a loop for CPU-bound task. How come?

Recently I've been working on a project, and found behaviour that I don't understand. We have endpoint that fetches documents from mongodb and then applies transformation to each document, replacing some symbols in data using regex. And what's bothering me is that for 7400 documents applying transformation function in a regular loop takes 6 seconds to finish. And using ThreadPoolExecutor.map finishes in 3.4 seconds. As far as I know GIL prevents python interpreter from running more that one thread simultaneously, so for CPU-bound tasks should run slower in ThreadPoolExecutor than in regular loop. But this is not a case here. How so? I'm assuming that re operations somehow release GIL but not sure. Here is code:
# parts that are responsible for substituting symbols in mongo document
class Converter:
def __init__(self, reverse=False):
self.reverse = reverse
def key_convert(self, key, reverse, path):
return key
def value_convert(self, value, reverse, path):
return value
def recurse(self, data, path=tuple()):
if isinstance(data, Mapping):
_data = {}
for k, v in data.items():
next_path = path + (k,)
key = self.key_convert(k, self.reverse, path)
value = self.value_convert(v, self.reverse, next_path)
_data[key] = self.recurse(value, next_path)
return _data
elif isinstance(data, Iterable) and not isinstance(data, (str, bytes)):
return [
self.recurse(it, path + (idx,))
for idx, it in enumerate(data)]
return self.value_convert(data, self.reverse, path)
def convert(self, data):
self.reverse = False
return self.recurse(data)
def unconvert(self, data):
self.reverse = True
return self.recurse(data)
class MongoConverter(Converter):
_to_mongo = ((rec(r"\."), "\u00B7"), (rec(r"^\$"), "#"),)
_from_mongo = ((rec("\u00B7"), "."), (rec("^#"), "$"),)
def key_convert_to_mongo(self, key):
return pattern_substitute(key, self._to_mongo)
def key_convert_from_mongo(self, key):
return pattern_substitute(key, self._from_mongo)
def key_convert(self, key, reverse, _):
if reverse:
return self.key_convert_from_mongo(key)
return self.key_convert_to_mongo(key)
def value_convert(self, value, reverse, path):
if reverse:
return value
return as_datetime(value)
def pattern_substitute(value, pattern_substitutes):
for pattern, substitute in pattern_substitutes:
value = pattern.sub(substitute, value)
return value
# storage adapter
class MongoStorage:
def __init__(self, collection, converter=None):
self.collection = collection
self.converter = converter if converter else MongoConverter()
self._context = None
self.executor = ThreadPoolExecutor()
def after_find(self, data):
if data is not None:
return self.converter.unconvert(data)
def find(self, filtr=None, limit=-1, **kwargs):
filtr = filter_converter.convert(filtr)
if limit == 0:
return []
if limit == -1:
limit = 0
# this part is what I'm asking about. Regular loop here is slower
return self.executor.map(
self.after_find,
self.collection.find(filtr, limit=limit, **kwargs)
)
Thank you for answers.

Class inheritance type checking after pickling in Python

Is there a sure-fire way to check that the class of an object is a sub-class of the desired super?
For Example, in a migration script that I'm writing, I have to convert objects of a given type to dictionaries in a given manner to ensure two-way compatability of the data.
This is best summed up like so:
Serializable
User
Status
Issue
Test
Set
Step
Cycle
However, when I'm recursively checking objects after depickling, I receive a Test object that yields the following results:
Testing data object type:
type(data)
{type}< class'__main.Test' >
Testing Class type:
type(Test())
{type}< class'__main.Test' >
Testing object type against class type:
type(Test()) == type(data)
{bool}False
Testing if object isinstance() of Class:
isinstance(data, Test)
{bool}False
Testing if Class isinstance() of Super Class:
isinstance(Test(), Serializable)
{bool}True
Testing isinstance() of Super Class::
isinstance(data, Serializable)
{bool}False
Interestingly, it doesn't appear to have any such problem prior to pickling as it handles the creation of dictionary and integrity hash just fine.
This only crops up with depickled objects in both Pickle and Dill.
For Context, here's the code in it's native environment - the DataCache object that is pickled:
class DataCache(object):
_hash=""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data,list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data,(dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key,value in datahash.iteritems():
datahash[key]= DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict = {}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
Finally, I know there's arguments for using JSON for readability in storage, I needed Pickle's ability to convert straight to and from Objects without specifying the object type myself. (thanks to the nesting, it's not really feasible)
Am I going mad here or does pickling do something to the class definitions?
EDIT:
Minimal Implementation:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
from aenum import Enum
import json # _tricks
import base64
import argparse
import os
import sys
import datetime
import dill
import hashlib
import collections
class Serializable(object):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
def __str__(self):
return str(self.sortSelf())
def sortSelf(self):
return collections.OrderedDict(sorted(self.__dict__.items()))
def toDict(self):
return self.__dict__
def fromDict(self, dict):
# Not using __dict__.update(...) to avoid polluting objects with the excess data
varMap = self.__dict__
if dict and varMap:
for key in varMap:
if (key in dict):
varMap[key] = dict[key]
self.__dict__.update(varMap)
return self
return None
class Issue(Serializable):
def __init__(self, initDict={}):
self.id = 0
self.key = ""
self.fields = {}
if initDict:
self.__dict__.update(initDict)
Serializable.__init__(self)
def fieldToDict(self, obj, key, type):
if key in obj:
result = obj[key]
else:
return None
if result is None:
return None
if isinstance(result, type):
return result.toDict()
return result
def fromDict(self, jsonDict):
super(Issue, self).fromDict(jsonDict)
self.fields["issuetype"] = IssueType().fromDict(self.fields["issuetype"])
self.fields["assignee"] = User().fromDict(self.fields["assignee"])
self.fields["creator"] = User().fromDict(self.fields["creator"])
self.fields["reporter"] = User().fromDict(self.fields["reporter"])
return self
def toDict(self):
result = super(Issue, self).toDict()
blankKeys = []
for fieldName, fieldValue in self.fields.iteritems():
if fieldValue is None:
blankKeys.append(fieldName)
if blankKeys:
for key in blankKeys:
self.fields.pop(key, None)
result["fields"]["issuetype"] = self.fieldToDict(result["fields"], "issuetype", IssueType)
result["fields"]["creator"] = self.fieldToDict(result["fields"], "creator", User)
result["fields"]["reporter"] = self.fieldToDict(result["fields"], "reporter", User)
result["fields"]["assignee"] = self.fieldToDict(result["fields"], "assignee", User)
return result
class IssueType(Serializable):
def __init__(self):
self.id = 0
self.name = ""
def toDict(self):
return {"id": str(self.id)}
class Project(Serializable):
def __init__(self):
Serializable.__init__(self)
self.id = 0
self.name = ""
self.key = ""
class Cycle(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.totalExecutions = 0
self.endDate = ""
self.description = ""
self.totalExecuted = 0
self.started = ""
self.versionName = ""
self.projectKey = ""
self.versionId = 0
self.environment = ""
self.totalCycleExecutions = 0
self.build = ""
self.ended = ""
self.name = ""
self.modifiedBy = ""
self.projectId = 0
self.startDate = ""
self.executionSummaries = {'executionSummary': []}
class Step(Serializable):
def __init__(self):
self.id = ""
self.orderId = 0
self.step = ""
self.data = ""
self.result = ""
self.attachmentsMap = {}
def toDict(self):
dict = {}
dict["step"] = self.step
dict["data"] = self.data
dict["result"] = self.result
dict["attachments"] = []
return dict
class Status(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.description = ""
self.isFinal = True
self.color = ""
self.isNative = True
self.statusCount = 0
self.statusPercent = 0.0
class User(Serializable):
def __init__(self):
self.displayName = ""
self.name = ""
self.emailAddress = ""
self.key = ""
self.active = False
self.timeZone = ""
class Execution(Serializable):
def __init__(self):
self.id = 0
self.orderId = 0
self.cycleId = -1
self.cycleName = ""
self.issueId = 0
self.issueKey = 0
self.projectKey = ""
self.comment = ""
self.versionId = 0,
self.versionName = "",
self.executedOn = ""
self.creationDate = ""
self.executedByUserName = ""
self.assigneeUserName = ""
self.status = {}
self.executionStatus = ""
def fromDict(self, jsonDict):
super(Execution, self).fromDict(jsonDict)
self.status = Status().fromDict(self.status)
# This is already listed as Execution Status, need to associate and convert!
return self
def toDict(self):
result = super(Execution, self).toDict()
result['status'] = result['status'].toDict()
return result
class ExecutionContainer(Serializable):
def __init__(self):
self.executions = []
def fromDict(self, jsonDict):
super(ExecutionContainer, self).fromDict(jsonDict)
self.executions = []
for executionDict in jsonDict["executions"]:
self.executions.append(Execution().fromDict(executionDict))
return self
class Test(Issue):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
Issue.__init__(self)
def toDict(self):
result = super(Test, self).toDict()
stepField = "CustomField_0001"
if result["fields"][stepField]:
steps = []
for step in result["fields"][stepField]["steps"]:
steps.append(step.toDict())
result["fields"][stepField] = steps
return result
def fromDict(self, jsonDict):
super(Test, self).fromDict(jsonDict)
stepField = "CustomField_0001"
steps = []
if stepField in self.fields:
for step in self.fields[stepField]["steps"]:
steps.append(Step().fromDict(step))
self.fields[stepField] = {"steps": steps}
return self
class Set(Issue):
def __init__(self, initDict={}):
self.__dict__.update(initDict)
Issue.__init__(self)
class DataCache(object):
_hash = ""
_data = None
#staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
#staticmethod
def dictify(data):
if isinstance(data, list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data, (dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key, value in datahash.iteritems():
datahash[key] = DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict={}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
def saveCache(name, projectKey, object):
filePath = "migration_caches/{projectKey}".format(projectKey=projectKey)
if not os.path.exists(path=filePath):
os.makedirs(filePath)
cache = DataCache()
cache.set(object)
targetFile = open("{path}/{name}".format(name=name, path=filePath), 'wb')
dill.dump(obj=cache, file=targetFile)
targetFile.close()
def loadCache(name, projectKey):
filePath = "migration_caches/{projectKey}/{name}".format(name=name, projectKey=projectKey)
result = False
try:
targetFile = open(filePath, 'rb')
try:
cache = dill.load(targetFile)
if isinstance(cache, DataCache):
if cache.verify():
result = cache.get()
except EOFError:
# except BaseException:
print ("Failed to load cache from file: {filePath}\n".format(filePath=filePath))
except IOError:
("Failed to load cache file at: {filePath}\n".format(filePath=filePath))
targetFile.close()
return result
testIssue = Test().fromDict({"id": 1000,
"key": "TEST",
"fields": {
"issuetype": {
"id": 1,
"name": "TestIssue"
},
"assignee": "Minothor",
"reporter": "Minothor",
"creator": "Minothor",
}
})
saveCache("Test", "TestProj", testIssue)
result = loadCache("Test", "TestProj")
EDIT 2
The script in it's current form, now seems to work correctly with vanilla Pickle, (initially switched to Dill due to a similar issue, which was solved by the switch).
However, if you are here with this issue and require Dill's features, then as Mike noted in the comments - it's possible to change the settings in dill.settings to have Dill behave pickle referenced items only with joblib mode, effectively mirroring pickle's standard pickling behaviour.

Python generate sorted list

I want to compress my movies automatically. So I've written a mediainfo wrapper class in python, to generate a xml output, which I then parse to a movieinfo class, with a list of audio and subtitle tracks.
__author__ = 'dominik'
class Error(Exception):
""" Error class
"""
class ValidationError(Error):
""" Invalid or missing xml items
"""
class MovieInfo(object):
""" Description of movie file
"""
def __init__(self, media_info):
self._video_track = None
self._audio_tracks = []
self._subtitle_tracks = []
self.valid_movie = True
for track in media_info.tracks:
if track.track_type == "Audio":
self._audio_tracks.append(AudioTrack(track))
elif track.track_type == "Text":
self._subtitle_tracks.append(SubtitleTrack(track))
elif track.track_type == "Video":
self._video_track = VideoTrack(track)
#property
def audio_tracks(self):
if not hasattr(self, "_audio_tracks"):
self._audio_tracks = []
if len(self._audio_tracks) != 0:
return self._audio_tracks
#property
def subtitle_tracks(self):
if not hasattr(self, "_subtitle_tracks"):
self._subtitle_tracks = []
if len(self._subtitle_tracks) != 0:
return self._subtitle_tracks
class Track(object):
""" Abstract track class for audio and subtitle tracks
"""
__KNOWN_LANGUAGE_CODES = {"en": "ENG", "de": "DE"}
def __init__(self, track, valid_codecs):
self._valid = True
track_id = int(track.id)
codec_id = self._determine_codec(track.codec_id, valid_codecs)
language = self._determine_language(track.language)
self._id = track_id
self._codec_id = codec_id
self._language = language
def _determine_codec(self, track_codec, types):
result = types.get(track_codec, None)
if result is None:
self._valid = False
return result
def _determine_language(self, track_language, types=__KNOWN_LANGUAGE_CODES):
result = types.get(track_language, None)
if result is None:
self._valid = False
return result
class AudioTrack(Track):
""" Audio track class
"""
__KNOWN_AUDIO_CODECS = {"A_DTS": "DTS", "A_AC3": "AC3"}
def __init__(self, track):
self._type = 1
Track.__init__(self, track, self.__KNOWN_AUDIO_CODECS)
class SubtitleTrack(Track):
""" Subtitle track class
"""
__KNOWN_SUBTITLE_CODECS = {"S_VOBSUB": "VOBSUB"}
def __init__(self, track):
self._type = 2
if track.forced == "Yes":
self._forced = True
else:
self._forced = False
Track.__init__(self, track, self.__KNOWN_SUBTITLE_CODECS)
class VideoTrack(object):
""" Video track class (only one video track in movie info!)
"""
def __init__(self, track):
self._type = 0
self._framerate = float(track.frame_rate)
self._width = track.width
self._height = track.height
Here is the mediainfo class (it's the pymediainfo class):
from subprocess import Popen
import os
from tempfile import mkstemp
from bs4 import BeautifulSoup, NavigableString
from setuptools.compat import unicode
class Track(object):
""" Hold the track information
"""
def __getattr__(self, item):
try:
return object.__getattribute__(self, item)
except:
pass
return None
def __init__(self, xml_track):
self.xml_track = xml_track
self.track_type = xml_track.attrs["type"]
for child in self.xml_track.children:
if not isinstance(child, NavigableString):
node_name = child.name.lower().strip()
node_value = unicode(child.string)
node_other_name = "other_%s" % node_name
if getattr(self, node_name) is None:
setattr(self, node_name, node_value)
else:
if getattr(self, node_other_name) is None:
setattr(self, node_other_name, [node_value, ])
else:
getattr(self, node_other_name).append(node_value)
for key in [c for c in self.__dict__.keys() if c.startswith("other_")]:
try:
primary = key.replace("other_", "")
setattr(self, primary, int(getattr(self, primary)))
except:
for value in getattr(self, key):
try:
actual = getattr(self, primary)
setattr(self, primary, int(value))
getattr(self, key).append(actual)
break
except:
pass
def __repr__(self):
return("<Track id='{0}', type='{1}'>".format(self.id, self.track_type))
def to_data(self):
data = {}
for k, v in self.__dict__.items():
if k != 'xml_track':
data[k] = v
return data
class Mediainfo(object):
""" MediaInfo wrapper
"""
def __init__(self, xml):
self.xml_dom = xml
if isinstance(xml, str):
self.xml_dom = BeautifulSoup(xml, "xml")
def _populate_tracks(self):
if self.xml_dom is None:
return
for xml_track in self.xml_dom.Mediainfo.File.find_all("track"):
self._tracks.append(Track(xml_track))
#property
def tracks(self):
if not hasattr(self, "_tracks"):
self._tracks = []
if len(self._tracks) == 0:
self._populate_tracks()
return self._tracks
#staticmethod
def parse(filename):
filehandler_out, filename_out = mkstemp(".xml", "mediainfo-")
filehandler_err, filename_err = mkstemp(".error", "mediainfo-")
filepointer_out = os.fdopen(filehandler_out, "r+b")
filepointer_err = os.fdopen(filehandler_err, "r+b")
mediainfo_command = ["mediainfo", "-f", "--Output=XML", filename]
p = Popen(mediainfo_command, stdout=filepointer_out, stderr=filepointer_err)
p.wait()
filepointer_out.seek(0)
xml_dom = BeautifulSoup(filepointer_out.read(), "xml")
filepointer_out.close()
filepointer_err.close()
print(xml_dom)
return Mediainfo(xml_dom)
def to_data(self):
data = {'tracks': []}
for track in self.tracks:
data['tracks'].append(track.to_data())
return data
This class gives me every track in the xml and then I parse the relevant info in movieinfo.
Ok now I have a list of audiotracks e.g. 3 tracks one in german language and DTS, one in german and AC3 and one in english and AC3. Now I want to get the ids from the tracks in the format "1,2,3" to give it to handbrake cli.
My problem is the order of the tracks. If there is a german DTS track this schould be the first track, the second track should be also the first, but compressed to aac and the third track should be one english track in AAC. If there is only a german AC3 track then the first track should be this track but compressed to AAC, and the second track should englisch and AAC.
I don't know exactly how I can achive that, can you help me? I'm new to python, and come from C, C++ and C#. In C# this is very easy to get with lambda.
Assuming you know to define a compare-tor that given two items can define which is bigger then Python functions as well as C or C++.
Start here -
1. https://wiki.python.org/moin/HowTo/Sorting/
https://developers.google.com/edu/python/sorting
http://docs.python.org/2/library/functions.html#sorted
Using sorted method and define the key you want.

Web2py Custom Validators

I am new to Web2py and am trying to use a custom validator.
class IS_NOT_EMPTY_IF_OTHER(Validator):
def __init__(self, other,
error_message='must be filled because other value '
'is present'):
self.other = other
self.error_message = error_message
def __call__(self, value):
if isinstance(self.other, (list, tuple)):
others = self.other
else:
others = [self.other]
has_other = False
for other in others:
other, empty = is_empty(other)
if not empty:
has_other = True
break
value, empty = is_empty(value)
if empty and has_other:
return (value, T(self.error_message))
else:
return (value, None)
I do not understand how to use it on my table:
db.define_table('numbers',
Field('a', 'integer'),
Field('b', 'boolean'),
Field('c', 'integer')
I want to use this in a way that 'c' cannot be left black when 'b' is ticked.
save the code on /modules/customvalidators.py
from gluon.validators import is_empty
from gluon.validators import Validator
class IS_NOT_EMPTY_IF_OTHER(Validator):
def __init__(self, other,
error_message='must be filled because other value '
'is present'):
self.other = other
self.error_message = error_message
def __call__(self, value):
if isinstance(self.other, (list, tuple)):
others = self.other
else:
others = [self.other]
has_other = False
for other in others:
other, empty = is_empty(other)
if not empty:
has_other = True
break
value, empty = is_empty(value)
if empty and has_other:
return (value, T(self.error_message))
else:
return (value, None)
then in models/db.py
from customvalidator import IS_NOT_EMPTY_IF_OTHER
db.define_table("foo",
Field('a', 'integer'),
Field('b', 'boolean'),
Field('c', 'integer')
)
# apply the validator
db.foo.c.requires = IS_NOT_EMPTY_IF_OTHER(request.vars.b)
Also, note that it can be done easily without the above validator.
Forget all the code above and try this simplified way
Version 2:
controllers/default.py
def check(form):
if form.vars.b and not form.vars.c:
form.errors.c = "If the b is checked, c must be filled"
def action():
form = SQLFORM(db.foo)
if form.process(onvalidation=check).accepted:
response.flash = "success"
return dict(form=form)

How can I change in Python the return/input type of a list that is implemented as an class attribute?

EDIT (complete rephrase of the problem as the original version (see "original version", later) is misleading):
Here is the setting: I have a object which has a list of objects of type
<class 'One'>. I would like to access this list but rather work with objects
of type <class 'Two'> which is an enriched version of <class 'One'>.
Background (1):
One could be an object that can be stored easily via a ORM. The ORM would handle the list depending on the data model
Two would be an object like One but enriched by many features or the way it can be accessed
Background (2):
I try to solve a SQLAlchemy related question that I asked here. So, the answer to the present question could be also a solution to that question changing return/input type of SQLAlchemy-lists.
Here is some code for illustration:
import numpy as np
class One(object):
"""
Data Transfere Object (DTO)
"""
def __init__(self, name, data):
assert type(name) == str
assert type(data) == str
self.name = name
self.data = data
def __repr__(self):
return "%s(%r, %r)" %(self.__class__.__name__, self.name, self.data)
class Two(np.ndarray):
_DTO = One
def __new__(cls, name, data):
dto = cls._DTO(name, data)
return cls.newByDTO(dto)
#classmethod
def newByDTO(cls, dto):
obj = np.fromstring(dto.data, dtype="float", sep=',').view(cls)
obj.setflags(write=False) # Immutable
obj._dto = dto
return obj
#property
def name(self):
return self._dto.name
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
if __name__ == '__main__':
o1 = One('first object', "1, 3.0, 7, 8,1")
o2 = One('second object', "3.7, 8, 10")
my_data = DataUI ([o1, o2])
How to implement a list_of_twos which operates on list_of_ones but provides the user a list with elements of type Two:
type (my_data.list_of_twos[1]) == Two
>>> True
my_data.list_of_twos.append(Two("test", "1, 7, 4.5"))
print my_data.list_of_ones[-1]
>>> One('test', '1, 7, 4.5')
Original version of the question:
Here is an illustration of the problem:
class Data(object):
def __init__(self, name, data_list):
self.name = name
self.data_list = data_list
if __name__ == '__main__':
my_data = Data ("first data set", [0, 1, 1.4, 5])
I would like to access my_data.data_list via another list (e.g. my_data.data_np_list) that handles list-elements as a different type (e.g. as numpy.ndarray):
>>> my_data.data_np_list[1]
array(1)
>>> my_data.data_np_list.append(np.array(7))
>>> print my_data.data_list
[0, 1, 1.4, 5, 7]
You should use a property
class Data(object):
def __init__(self, name, data_list):
self.name = name
self.data_list = data_list
#property
def data_np_list(self):
return numpy.array(self.data_list)
if __name__ == '__main__':
my_data = Data ("first data set", [0, 1, 1.4, 5])
print my_data.data_np_list
edit: numpy use a continous memory area. python list are linked list. You can't have both at the same time without paying a performance cost which will make the whole thing useless. They are different data structures.
No, you can't do it easily (or at all without losing any performance gain you might get in using numpy.array). You're wanting two fundamentally different structures mirroring one another, this will mean storing the two and transferring any modifications between the two; subclassing both list and numpy.array to observe modifications will be the only way to do that.
Not sure whether your approach is correct.
A property getter would help achieve what you're doing. Here's something similar using arrays instead of numpy.
I've made the array (or in your case numpy data type) the internal representation, with the conversion to list only done on demand with a temporary object returned.
import unittest
import array
class GotAGetter(object):
"""Gets something.
"""
def __init__(self, name, data_list):
super(GotAGetter, self).__init__()
self.name = name
self.data_array = array.array('i', data_list)
#property
def data_list(self):
return list(self.data_array)
class TestProperties(unittest.TestCase):
def testProperties(self):
data = [1,3,5]
test = GotAGetter('fred', data)
aString = str(test.data_array)
lString = str(test.data_list) #Here you go.
try:
test.data_list = 'oops'
self.fail('Should have had an attribute error by now')
except AttributeError as exAttr:
self.assertEqual(exAttr.message, "can't set attribute")
self.assertEqual(aString, "array('i', [1, 3, 5])",
"The array doesn't look right")
self.assertEqual(lString, '[1, 3, 5]',
"The list property doesn't look right")
if __name__ == "__main__":
unittest.main()
One solution I just came up with would be to implement a View of the list via class ListView which takes the following arguments:
raw_list: a list of One-objects
raw2new: a function that converts One-objects to Two-objects
new2raw: a function that converts Two-objects to One-objects
Here is a the code:
class ListView(list):
def __init__(self, raw_list, raw2new, new2raw):
self._data = raw_list
self.converters = {'raw2new': raw2new,
'new2raw': new2raw}
def __repr__(self):
repr_list = [self.converters['raw2new'](item) for item in self._data]
repr_str = "["
for element in repr_list:
repr_str += element.__repr__() + ",\n "
repr_str = repr_str[:-3] + "]"
return repr_str
def append(self, item):
self._data.append(self.converters['new2raw'](item))
def pop(self, index):
self._data.pop(index)
def __getitem__(self, index):
return self.converters['raw2new'](self._data[index])
def __setitem__(self, key, value):
self._data.__setitem__(key, self.converters['new2raw'](value))
def __delitem__(self, key):
return self._data.__delitem__(key)
def __getslice__(self, i, j):
return ListView(self._data.__getslice__(i,j), **self.converters)
def __contains__(self, item):
return self._data.__contains__(self.converters['new2raw'](item))
def __add__(self, other_list_view):
assert self.converters == other_list_view.converters
return ListView(
self._data + other_list_view._data,
**self.converters
)
def __len__(self):
return len(self._data)
def __eq__(self, other):
return self._data == other._data
def __iter__(self):
return iter([self.converters['raw2new'](item) for item in self._data])
Now, DataUI has to look something like this:
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
self.list_of_twos = ListView(
self.list_of_ones,
Two.newByDTO,
Two.getDTO
)
With that, Two needs the following method:
def getDTO(self):
return self._dto
The entire example would now look like the following:
import unittest
import numpy as np
class ListView(list):
def __init__(self, raw_list, raw2new, new2raw):
self._data = raw_list
self.converters = {'raw2new': raw2new,
'new2raw': new2raw}
def __repr__(self):
repr_list = [self.converters['raw2new'](item) for item in self._data]
repr_str = "["
for element in repr_list:
repr_str += element.__repr__() + ",\n "
repr_str = repr_str[:-3] + "]"
return repr_str
def append(self, item):
self._data.append(self.converters['new2raw'](item))
def pop(self, index):
self._data.pop(index)
def __getitem__(self, index):
return self.converters['raw2new'](self._data[index])
def __setitem__(self, key, value):
self._data.__setitem__(key, self.converters['new2raw'](value))
def __delitem__(self, key):
return self._data.__delitem__(key)
def __getslice__(self, i, j):
return ListView(self._data.__getslice__(i,j), **self.converters)
def __contains__(self, item):
return self._data.__contains__(self.converters['new2raw'](item))
def __add__(self, other_list_view):
assert self.converters == other_list_view.converters
return ListView(
self._data + other_list_view._data,
**self.converters
)
def __len__(self):
return len(self._data)
def __iter__(self):
return iter([self.converters['raw2new'](item) for item in self._data])
def __eq__(self, other):
return self._data == other._data
class One(object):
"""
Data Transfere Object (DTO)
"""
def __init__(self, name, data):
assert type(name) == str
assert type(data) == str
self.name = name
self.data = data
def __repr__(self):
return "%s(%r, %r)" %(self.__class__.__name__, self.name, self.data)
class Two(np.ndarray):
_DTO = One
def __new__(cls, name, data):
dto = cls._DTO(name, data)
return cls.newByDTO(dto)
#classmethod
def newByDTO(cls, dto):
obj = np.fromstring(dto.data, dtype="float", sep=',').view(cls)
obj.setflags(write=False) # Immutable
obj._dto = dto
return obj
#property
def name(self):
return self._dto.name
def getDTO(self):
return self._dto
class DataUI(object):
def __init__(self, list_of_ones):
for one in list_of_ones:
assert type(one) == One
self.list_of_ones = list_of_ones
self.list_of_twos = ListView(
self.list_of_ones,
Two.newByDTO,
Two.getDTO
)
class TestListView(unittest.TestCase):
def testProperties(self):
o1 = One('first object', "1, 3.0, 7, 8,1")
o2 = One('second object', "3.7, 8, 10")
my_data = DataUI ([o1, o2])
t1 = Two('third object', "4.8, 8.2, 10.3")
t2 = Two('forth object', "33, 1.8, 1.0")
# append:
my_data.list_of_twos.append(t1)
# __getitem__:
np.testing.assert_array_equal(my_data.list_of_twos[2], t1)
# __add__:
np.testing.assert_array_equal(
(my_data.list_of_twos + my_data.list_of_twos)[5], t1)
# __getslice__:
np.testing.assert_array_equal(
my_data.list_of_twos[1:],
my_data.list_of_twos[1:2] + my_data.list_of_twos[2:]
)
# __contains__:
self.assertEqual(my_data.list_of_twos.__contains__(t1), True)
# __setitem__:
my_data.list_of_twos.__setitem__(1, t1),
np.testing.assert_array_equal(my_data.list_of_twos[1], t1)
# __delitem__:
l1 = len(my_data.list_of_twos)
my_data.list_of_twos.__delitem__(1)
l2 = len(my_data.list_of_twos)
self.assertEqual(l1 - 1, l2)
# __iter__:
my_data_2 = DataUI ([])
for two in my_data.list_of_twos:
my_data_2.list_of_twos.append(two)
if __name__ == '__main__':
unittest.main()

Categories