How to validate words divided by a comma by FormEncode ?
Something like this:
"foo1, foo2, foo3" -> ["foo1", "foo2", "foo3"]
You'll probably need a custom validator. Here's a quick example:
import formencode
class CommaSepList(formencode.validators.FancyValidator):
def _to_python(self, value, state):
return value.split(",")
def validate_python(self, value, state):
for elem in value:
if elem == "":
raise formencode.Invalid("an element of the list is empty", value, state)
>>> CommaSepList.to_python("1,2,3")
['1', '2', '3']
>>> CommaSepList.to_python("1,,")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib64/python2.5/site-packages/FormEncode-1.2.3dev-py2.5.egg/formencode/api.py", line 416, in to_python
vp(value, state)
File "myValidator.py", line 17, in validate_python
raise formencode.Invalid("an element of the list is empty", value, state)
Of course, you'll want to add validation specific to your use case.
Assuming each word is separated by a comma and a space (', '):
>>> x = "foo1, bar2, foo3"
>>> x.split(', ')
['foo1', 'bar2', 'foo3']
And then pass that list on to FormEncode and have it do whatever you need it to do.
Related
I describe a class that loads data from server into a dataframe and then processes it. Here is my code (not including importing libraries):
class Save(Data):
def __init__(self, server, database, username, driver, group=None):
super().__init__(server, database, username, driver)
self.group = group
def get_all_goods(self):
goods_table = pd.read_sql_query(f''' SELECT [p_1], [p_2]
FROM [table] WHERE [group] = '{self.group}' ''',
self.hndl)
return goods_table
def data_preprocessing(self):
data_prepared = self.get_all_goods()
data_prepared['desc'] = data_prepared[['p_1', 'p_2']].apply(lambda x: ' '.join(x), axis=1)
return data_prepared
#staticmethod
def data_cleaning(str):
words = []
str = re.sub(r"(\w*(\.\w*))", ' ', str)
str = re.sub(r"\d*\_\d*", ' ', str)
for i in re.split('[;,.,\n,\s,:,-,+,(,),=,/,«,»,#,!,?,",_,*]',str):
if len(i) > 1:
words.append(i)
return words
def data_morphy(self, text):
morph = pymorphy2.MorphAnalyzer()
tokens = [morph.parse(token)[0].normal_form for token in self.data_cleaning(text) if token != ' ']
tokens = list(dict.fromkeys(tokens))
text = ' '.join(tokens)
return text
def data_final(self):
data_final = self.data_preprocessing()
data_final['desc'] = data_final['desc'].apply(lambda x: self.data_morphy(x))
return data_final
But when I run the code, it does not complete its work, I waited for half an hour, but the code was not executed. But the methods get_all_goods and data_preprocessing work well. Where did I make a mistake?
EDIT:
There is an error now, when I use cls.data_final():
Traceback (most recent call last):
File "C:\Program Files\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-8-66905faafdf3>", line 103, in <module>
cls.data_final()
File "<ipython-input-8-66905faafdf3>", line 68, in data_final
data_final['desc'] = data_final['desc'].apply(lambda x: self.data_morphy(x))
TypeError: 'method' object is not subscriptable
The name data_final is shared as both the name of the dataframe and the name of the function. When trying to use data_final['desc'] as a pandas dataframe, the code thinks you're trying to call the data_final() method. This is why you're getting the TypeError: 'method' object is not subscriptable
Solution - change the name of your pandas frame to something that isn't the function name, eg:
def data_final(self):
dont_name_your_variables_the_same_as_your_functions = self.data_preprocessing()
dont_name_your_variables_the_same_as_your_functions['desc'] = dont_name_your_variables_the_same_as_your_functions['desc'].apply(lambda x: self.data_morphy(x))
return dont_name_your_variables_the_same_as_your_functions
:)
I have the following MongonEngine models:
from app import db
from datetime import datetime
from mongoengine import signals
class PathEmbedded(db.EmbeddedDocument):
"""
To be embedded.
"""
_id = db.ObjectIdField(required=False)
distance = db.IntField(required=False, min_value=0, default=0)
meta = {
"allow_inheritance": True,
}
def __unicode__(self):
return "Path '%s': %d m" % (self.id, self.distance)
class Path2(PathEmbedded, db.Document):
"""
Same as above, but standalone version to be stored in its own collection.
"""
_id = db.ObjectIdField()
orig = db.ObjectIdField(required=True)
dest = db.ObjectIdField(required=True)
updateStamp = db.DateTimeField(required=True)
ok_to_use = db.BooleanField(required=True, default=False)
meta = {
'indexes': [
{
'fields': ['ok_to_use', 'orig', 'dest'],
'cls': False, # does this affect performance?!
},
],
}
#classmethod
def pre_save(cls, sender, document, **kwargs):
document.updateStamp = datetime.utcnow()
def to_embedded(self):
"""
Converts the standalone Path instance into an embeddadle PathEmbedded instance.
"""
import json
temp = json.loads(self.to_json())
#remove the {"_cls": "Location"} key.
#If we don't do this, the output will be a 'Location' instance, not a 'LocationEmbedded' instace
temp.pop('_cls')
return PathEmbedded().from_json(json.dumps(temp))
def get_from_gmaps(self):
"""
Get distance from Google maps using the directions API and append to the 'paths' list.
Return False on error or True on success.
"""
try:
self.distance = 10,
self.save()
except Exception, e:
print str(e)
return False
else:
return True
# connect event hooks:
signals.pre_save.connect(Path2.pre_save, sender=Path2)
So, at some point I'm updating a path instance by calling get_from_gmaps():
from app.models.Path2 import Path2 as P
from bson import ObjectId
p=P(orig=ObjectId(), dest=ObjectId())
p.save()
p.get_from_gmaps()
which raises:
>>> p.get_from_gmaps()
ValidationError (Path2:54d34b97362499300a6ec3be) (10 could not be converted to int: ['distance'])
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "[...]app/models/Path2/get_from_gmaps.py", line 18, in get_from_gmaps
self.save()
File "[...]venv/local/lib/python2.7/site-packages/mongoengine/document.py", line 224, in save
self.validate(clean=clean)
File "[...]venv/local/lib/python2.7/site-packages/mongoengine/base/document.py", line 323, in validate
raise ValidationError(message, errors=errors)
ValidationError: ValidationError (Path2:54d34b97362499300a6ec3be) (10 could not be converted to int: ['distance'])
Originally I was storing an integer parsed from some json and converted to int, and thought somthing was wrong there, but i replaced it with an int value for debugging and now get this. I really don't know where to start o.O
EDIT: expanded code to provide complete [non]working example.
There's an extra comma after the 10:
self.distance = 10,
^
You are setting distance to a tuple containing an int, instead of an int.
HINT: The reason why your are seeing such an unhelpful message is that MongoEngine is using %s format string improperly. In fact, the result of "%s" % something depends on the type of something, as tuples are special cased. Compare:
>>> '%s' % 10
'10'
>>> '%s' % (10,)
'10'
>>> '%s' % (10, 11)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: not all arguments converted during string formatting
>>> '%s' % ((10,),) # This is the correct way of formatting strings
'(10,)' # when the type of the argument is unknown.
This is a MongoEngine's problem of course, but if you want to avoid the same kind of mistake in your code, remember to always use tuples at the right of the % operator, or even better use the .format() method.
Are you sure the self model you send is the right one?
This ValidationError is thrown when you have declare a ReferenceField in a document, and you try to save this document before saving the referenced document (Mongoengine represents a reference field in MongoDB as an dictionnay containing the class and the ObjectId of the reference).
I'm testing the following function:
def getDataMapOfFirstLine(line):
datamap = {}
for item in line:
hierarchy = item.split('^')
partialmap = datamap
i=0
for node in hierarchy:
partialmap = partialmap.setdefault(node, i)
i += 1
return datamap
It should create a dictionary out of the first line of a csv-file, that looks like this:
nummer;such;ans;bverb^konum;bverb^namebspr;bverb^bank^iident;
1213;HANS;Hans Dominik;111000222;Hans' account; DE2145432523534232;
1444555;DIRK;Dirk Daniel;13300002;Dirk's account; DE2134634565462352;
As you see these circumflex-signs in each semicolon-separated string are something like a join in SQL. If I execute it, I get this error:
Traceback (most recent call last):
File "./importtool.py", line 173, in <module>
main()
File "./importtool.py", line 38, in main
analyseImportFile(importfile, parser, options)
File "./importtool.py", line 119, in analyseImportFile
datamap = getDataMapOfFirstLine(line)
File "./importtool.py", line 149, in getDataMapOfFirstLine
partialmap = partialmap.setdefault(node, i)
AttributeError: 'int' object has no attribute 'setdefault'
If I replace the i in the setdefault-function by {} I get no error:
{'bverb': {'namebspr': {}, 'konum': {}, 'bank': {'iident': {}}}, 'such': {}, 'ans': {}}
This is nearly, what I want, but instead of the {} I would like to get a column-number.
I just don't get what is wrong. I tried this in interactive mode:
>>> mydict = {'foo': "Hallo", 'bar': 5}
>>> mydict.setdefault("sth", 12)
12
>>> print mydict
{'sth': 12, 'foo': 'Hallo', 'bar': 5}
As you see, this works...
I appreciate every help. Thanks in advance!
Your problem is this line:
partialmap = partialmap.setdefault(node, i)
dict.setdefault returns the thing that was set (or what was already there). In this case, it's an integer so you're setting partialmap to an int. You can probably just not grab the return value (which is what you've done in the interactive terminal BTW):
partialmap.setdefault(node, i)
I am building an algorithm for sentiment analysis which could segment do the segmentation on a .txt corpus, but there has been some problem in the code I dont know how to resolve?
class Splitter(object):
def _init_(self):
self.nltk_splitter = nltk.data.load('tokenizers/punkt/english/pickle')
self.nltk_tokenizer = nltk.tokenize.TreebankWordTokenizer()
def split(self,text):
"""imput format: a .txt file
output format : a list of lists of words.
for eg [['this', 'is']['life' , 'worth' , 'living']]"""
sentences = self.nltk_splitter.tokenize(text)
tokenized_sentences = [self.nltk_tokenizer.tokenize(sent) for sent in sentences]
return tokenized_sentences
and then I did the following things
>>> f = open('amazonshoes.txt')
>>> raw = f.read()
>>> text = nltk.Text(raw)
>>> splitter = Splitter()
>>> splitted_sentences = splitter.split(text)
and the error is
Traceback (most recent call last):
File "<pyshell#21>", line 1, in <module>
splitted_sentences = splitter.split(text)
File "<pyshell#14>", line 9, in split
sentences = self.nltk_splitter.tokenize(text)
AttributeError: 'Splitter' object has no attribute 'nltk_splitter'
The constructor of the class Splitter should be called __init__, with two leading and trailing underscores.
Currently the _init_ method (single underscore) is not executed, so the Splitter object your create (by calling Splitter()) never acquires the attribute/field nltk_splitter
I'm trying to get results from a SOAP service called Chrome ADS (for vehicle data). They provided php and Java samples, but I need python (our site is in Django). My question is:
What should I be passing as a request to the SOAP service when using wsdl2py-generated classes?
Following the examples I'm using a DataVersionsRequest object as the request parameter, but the code generated by wsdl2py seems to want a getDataVersions object, and there's something like that defined at the bottom of the generated _client.py file. But that too seems to throw an error. So I'm not sure what I should be passing as the request obj. Any suggestions?
$sudo apt-get install python-zsi
$wsdl2py http://platform.chrome.com/***********
$python
>>> url = "http://platform.chrome.com/***********"
>>> from AutomotiveDescriptionService6_client import *
>>> from AutomotiveDescriptionService6_types import *
>>> locator = AutomotiveDescriptionService6Locator()
>>> service = locator.getAutomotiveDescriptionService6Port()
>>> locale = ns0.Locale_Def('locale')
>>> locale._country="US"
>>> locale._language="English"
>>> acctInfo = ns0.AccountInfo_Def('accountInfo')
>>> acctInfo._accountNumber=*****
>>> acctInfo._accountSecret="*****"
>>> acctInfo._locale = locale
>>> dataVersionsRequest = ns0.DataVersionsRequest_Dec()
>>> dataVersionsRequest._accountInfo = acctInfo
>>> service.getDataVersions(dataVersionsRequest)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "AutomotiveDescriptionService6_client.py", line 36, in getDataVersions
raise TypeError, "%s incorrect request type" % (request.__class__)
TypeError: <class 'AutomotiveDescriptionService6_types.DataVersionsRequest_Dec'> incorrect request type
>>> dataVersionsRequest = getDataVersions
>>> dataVersionsRequest._accountInfo = acctInfo
>>> service.getDataVersions(dataVersionsRequest)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "AutomotiveDescriptionService6_client.py", line 36, in getDataVersions
raise TypeError, "%s incorrect request type" % (request.__class__)
AttributeError: class DataVersionsRequest_Holder has no attribute '__class__'
>>> quit()
$cat AutomotiveDescriptionService6_client.py
.....
# Locator
class AutomotiveDescriptionService6Locator:
AutomotiveDescriptionService6Port_address = "http://platform.chrome.com:80/AutomotiveDescriptionService/AutomotiveDescriptionService6"
def getAutomotiveDescriptionService6PortAddress(self):
return AutomotiveDescriptionService6Locator.AutomotiveDescriptionService6Port_address
def getAutomotiveDescriptionService6Port(self, url=None, **kw):
return AutomotiveDescriptionService6BindingSOAP(url or AutomotiveDescriptionService6Locator.AutomotiveDescriptionService6Port_address, **kw)
# Methods
class AutomotiveDescriptionService6BindingSOAP:
def __init__(self, url, **kw):
kw.setdefault("readerclass", None)
kw.setdefault("writerclass", None)
# no resource properties
self.binding = client.Binding(url=url, **kw)
# no ws-addressing
# op: getDataVersions
def getDataVersions(self, request, **kw):
if isinstance(request, getDataVersions) is False:
raise TypeError, "%s incorrect request type" % (request.__class__)
# no input wsaction
self.binding.Send(None, None, request, soapaction="", **kw)
# no output wsaction
response = self.binding.Receive(getDataVersionsResponse.typecode)
return response
.....
getDataVersions = GED("urn:description6.kp.chrome.com", "DataVersionsRequest").pyclass
Also, as an aside, I'm not sure that the strings I'm passing to the pname parameter are correct, I assume that those are the ones I see inside the XML when I explore the service with SOAP UI, right?
It looks like you might be passing a class to service.getDataVersions() the second time instead of an instance (it can't be an instance if it doesn't have __class__).
What's happening is isinstance() returns false, and in the process of trying to raise a type error, an attribute error gets raised instead because it's trying to access __class__ which apparently doesn't exist.
What happens if you try:
>>> dataVersionsRequest = getDataVersions**()**
>>> dataVersionsRequest._accountInfo = acctInfo
>>> service.getDataVersions(dataVersionsRequest)
?
Based on the line:
if isinstance(request, getDataVersions) is False:
raise TypeError, "%s incorrect request type" % (request.__class__)
it definitely looks like you should be passing an instance of getDataVersions, so you're probably on the right track.
You probably need to be instantiating your definition objects and then populating them. Look for type == pyclass_type objects associated with the request you're wanting to make and instantiate them.
e.g. (just guessing)
>>> versionrequest = getDataVersions()
>>> versionrequest.AccountInfo = versionrequest.new_AccountInfo()
>>> versionrequest.AccountInfo.accountNumber = "123"
>>> versionrequest.AccountInfo.accountSecret = "shhhh!"
>>> service.getDataVersions(versionrequest)
I found that the code generated by wsdl2py was too slow for my purposes. Good luck.