On exercise 48 of Learn Python the Hard Way, I'm asked to create a module to be tested by this one, lexicon_tests.py:
from nose.tools import *
from ex48 import lexicon
def test_directions():
assert_equal(lexicon.scan("north"), [('direction', 'north')])
result = lexicon.scan("north south east")
assert_equal(result, [('direction', 'north'),
('direction', 'south'),
('direction', 'east')])
def test_verbs():
assert_equal(lexicon.scan("go"), [('verb', 'go')])
result = lexicon.scan("go kill eat")
assert_equal(result, [('verb', 'go'),
('verb', 'kill'),
('verb', 'eat')])
def test_stops():
assert_equal(lexicon.scan("the"), [('stop', 'the')])
result = lexicon.scan("the in of")
assert_equal(result, [('stop', 'the'),
('stop', 'in'),
('stop', 'of')])
def test_nouns():
assert_equal(lexicon.scan("bear"), [('noun', 'bear')])
result = lexicon.scan("bear princess")
assert_equal(result, [('noun', 'bear'),
('noun', 'princess')])
def test_numbers():
assert_equal(lexicon.scan("1234"), [('number', 1234)])
result = lexicon.scan("3 91234")
assert_equal(result, [('number', 3),
('number', 91234)])
def test_errors():
assert_equal(lexicon.scan("ASDFADFASDF"), [('error', 'ASDFADFASDF')])
result = lexicon.scan("bear IAS princess")
assert_equal(result, [('noun', 'bear'),
('error', 'IAS'),
('noun', 'princess')])
So I created the module, lexicon.py, to be tested here:
def scan(words):
directions = ['north', 'south', 'east', 'west', 'down', 'up', 'left', 'right', 'back']
verbs = ['go', 'stop', 'kill', 'eat']
stop_words = ['the', 'in', 'of', 'from', 'at', 'it']
nouns = ['door', 'bear', 'princess', 'cabinet']
lex = words.split()
list1 = []
for i in lex:
if i in directions:
list1.append(('direction', i))
elif i in verbs:
list1.append(('verb', i))
elif i in stop_words:
list1.append(('stop-word', i))
elif i in nouns:
list1.append(('noun', i))
elif i.isdigit():
list1.append(('number', convert_number(i)))
else:
list1.append(('error', i))
print list1
def convert_number(s):
try:
return int(s)
except ValueError:
return None
However when I run nosetests in powershell I get this AssertionError:
Traceback (most recent call last):
File "G:\Python27\lib\site-packages\nose\case.py", line 197, in runTest
self.test(*self.arg)
File "G:\Users\Charles\dropbox\programming\lexicon_test\skeleton\tests\lexicon_tests.py", line 6, in test_directions
assert_equal(lexicon.scan("north"), [('direction', 'north')])
AssertionError: None != [('direction', 'north')]
-------------------- >> begin captured stdout << ---------------------
[('direction', 'north')]
That's the same error message I get for each test run, six of them for the six functions in lexicon_tests.py. What does this error mean? It's been irritating be for a while now. Thanks in advance.
The assert_equal function takes two arguments, and throws an error if the arguments aren't equal to each other. In this case, the result of lexicon.scan("north") is None, and since this isn't equal to [('direction', 'north')], it's throwing an error.
In other words, your lexicon.scan function isn't working properly. It might have something to do with it missing a return statement.
Related
I have a dataframe where every row is a list of tuples , i.e.: tuple = (word, pos_tag). In each row, I want to change the word of some tuples by marking it and then update the tuple with the marked word. For example:
Initial dataframe row :
[('This', 'DET'), ('is', 'VERB'), ('an', 'DET'), ('example', 'NOUN'), ('text', 'NOUN'), ('that', 'DET'), ('I', 'PRON'), ('use', 'VERB'), ('in', 'ADP'), ('order', 'NOUN'), ('to', 'PART'), ('get', 'VERB'), ('an', 'DET'), ('answer', 'NOUN')]
Updated words :
updated_word : <IN>example</IN>
updated_word : <TAR>answer</TAR>
Desired output :
[('This', 'DET'), ('is', 'VERB'), ('an', 'DET'), ('<IN>example</IN>', 'NOUN'), ('text', 'NOUN'), ('that', 'DET'), ('I', 'PRON'), ('use', 'VERB'), ('in', 'ADP'), ('order', 'NOUN'), ('to', 'PART'), ('get', 'VERB'), ('an', 'DET'), ('<TAR>answer</TAR>', 'NOUN')]
But I get an error that TypeError: 'tuple' object is not callable. Can someone help? Here's the code :
for idx, row in df.iterrows():
doc = nlp(row['title'])
pos_tags = [(token.text, token.pos_) for token in doc if not token.pos_ == "PUNCT"]
for position, tuple in enumerate(pos_tags, start=1):
word = tuple[0]
spacy_pos_tag = tuple[1]
word = re.sub(r'[^\w\s]', '', word)
for col in cols:
if position in row[col]:
word = f'<{col.upper()}>{word}</{col.upper()}>'
else:
continue
tuple = tuple(word, spacy_pos_tag)
print(tuple)
>>>> Traceback (most recent call last):
>>>> tuple = tuple(word, spacy_pos_tag)
>>>> TypeError: 'tuple' object is not callable
Updated question
I have replaced tuple with tuple_ as suggested, but I still can't get back the desired output which is a list of tuples in every row. Can someone help how to update the dataframe rows? Here's the updated code :
for idx, row in df.iterrows():
doc = nlp(row['title'])
pos_tags = [(token.text, token.pos_) for token in doc if not token.pos_ == "PUNCT"]
# print(idx, "tokens, pos : ", pos_tags, "\n")
for position, tuple_ in enumerate(pos_tags, start=1):
word = tuple_[0]
spacy_pos_tag = tuple_[1]
word = re.sub(r'[^\w\s]', '', word)
for col in cols:
if position in row[col]:
word = f'<{col.upper()}>{word}</{col.upper()}>'
else:
continue
tuple_ = (word, spacy_pos_tag)
pos_tags.append(' '.join(position, tuple_))
# pos_tags.append(' '.join(tuple_))
print(idx, "tokens, pos : ", pos_tags, "\n")
>>>> Traceback (most recent call last):
>>>> pos_tag(df=df_matched)
>>>> pos_tags.append(' '.join(position, tuple_))
>>>> TypeError: join() takes exactly one argument (2 given)
Do not use tuple as a variable name, as it is a built-in python type name. Try the following instead:
for position, tuple_ in enumerate(pos_tags, start=1):
word = tuple_[0]
spacy_pos_tag = tuple_[1]
word = re.sub(r'[^\w\s]', '', word)
for col in cols:
if position in row[col]:
word = f'<{col.upper()}>{word}</{col.upper()}>'
else:
continue
tuple_ = (word, spacy_pos_tag)
print(tuple_)
Don't use "tuple" as name of a variable. It's a type name
I'm trying to lemmatize a string according to the part of speech but at the final stage, i'm getting an error. My code:
import nltk
from nltk.stem import *
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
wordnet_lemmatizer = WordNetLemmatizer()
text = word_tokenize('People who help the blinging lights are the way of the future and are heading properly to their goals')
tagged = nltk.pos_tag(text)
def get_wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return ''
for word in tagged: print(wordnet_lemmatizer.lemmatize(word,pos='v'), end=" ")
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-40-afb22c78f770> in <module>()
----> 1 for word in tagged: print(wordnet_lemmatizer.lemmatize(word,pos='v'), end=" ")
E:\Miniconda3\envs\uol1\lib\site-packages\nltk\stem\wordnet.py in lemmatize(self, word, pos)
38
39 def lemmatize(self, word, pos=NOUN):
---> 40 lemmas = wordnet._morphy(word, pos)
41 return min(lemmas, key=len) if lemmas else word
42
E:\Miniconda3\envs\uol1\lib\site-packages\nltk\corpus\reader\wordnet.py in _morphy(self, form, pos)
1710
1711 # 1. Apply rules once to the input to get y1, y2, y3, etc.
-> 1712 forms = apply_rules([form])
1713
1714 # 2. Return all that are in the database (and check the original too)
E:\Miniconda3\envs\uol1\lib\site-packages\nltk\corpus\reader\wordnet.py in apply_rules(forms)
1690 def apply_rules(forms):
1691 return [form[:-len(old)] + new
-> 1692 for form in forms
1693 for old, new in substitutions
1694 if form.endswith(old)]
E:\Miniconda3\envs\uol1\lib\site-packages\nltk\corpus\reader\wordnet.py in <listcomp>(.0)
1692 for form in forms
1693 for old, new in substitutions
-> 1694 if form.endswith(old)]
1695
1696 def filter_forms(forms):
I want to be able to lemmatize that string based on each word's part of speech all at once. Please help.
Firstly, try not to mix top-level, absolute and relative imports like these:
import nltk
from nltk.stem import *
from nltk import pos_tag, word_tokenize
This would be better:
from nltk import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet as wn
(See Absolute vs. explicit relative import of Python module)
The error you're getting is most probably because you are feeding in the outputs of pos_tag as the input to the WordNetLemmatizer.lemmatize(), i.e. :
>>> from nltk import pos_tag
>>> from nltk.stem import WordNetLemmatizer
>>> wnl = WordNetLemmatizer()
>>> sent = 'People who help the blinging lights are the way of the future and are heading properly to their goals'.split()
>>> pos_tag(sent)
[('People', 'NNS'), ('who', 'WP'), ('help', 'VBP'), ('the', 'DT'), ('blinging', 'NN'), ('lights', 'NNS'), ('are', 'VBP'), ('the', 'DT'), ('way', 'NN'), ('of', 'IN'), ('the', 'DT'), ('future', 'NN'), ('and', 'CC'), ('are', 'VBP'), ('heading', 'VBG'), ('properly', 'RB'), ('to', 'TO'), ('their', 'PRP$'), ('goals', 'NNS')]
>>> pos_tag(sent)[0]
('People', 'NNS')
>>> first_word = pos_tag(sent)[0]
>>> wnl.lemmatize(first_word)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/nltk/stem/wordnet.py", line 40, in lemmatize
lemmas = wordnet._morphy(word, pos)
File "/usr/local/lib/python2.7/dist-packages/nltk/corpus/reader/wordnet.py", line 1712, in _morphy
forms = apply_rules([form])
File "/usr/local/lib/python2.7/dist-packages/nltk/corpus/reader/wordnet.py", line 1694, in apply_rules
if form.endswith(old)]
AttributeError: 'tuple' object has no attribute 'endswith'
The input to WordNetLemmatizer.lemmatize() should be str not a tuple, so if you do:
>>> tagged_sent = pos_tag(sent)
>>> def penn2morphy(penntag, returnNone=False):
... morphy_tag = {'NN':wn.NOUN, 'JJ':wn.ADJ,
... 'VB':wn.VERB, 'RB':wn.ADV}
... try:
... return morphy_tag[penntag[:2]]
... except:
... return None if returnNone else ''
...
>>> for word, tag in tagged_sent:
... wntag = penn2morphy(tag)
... if wntag:
... print wnl.lemmatize(word, pos=wntag)
... else:
... print word
...
People
who
help
the
blinging
light
be
the
way
of
the
future
and
be
head
properly
to
their
goal
Or if you like an easy way out:
pip install pywsd
Then:
>>> from pywsd.utils import lemmatize, lemmatize_sentence
>>> sent = 'People who help the blinging lights are the way of the future and are heading properly to their goals'
>>> lemmatize_sentence(sent)
['people', 'who', 'help', 'the', u'bling', u'light', u'be', 'the', 'way', 'of', 'the', 'future', 'and', u'be', u'head', 'properly', 'to', 'their', u'goal']
Give an input sentence, that has BIO chunk tags:
[('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed',
'I-NP'), ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'),
('swallow', 'I-NP'), ('?', 'O')]
I would need to extract the relevant phrases out, e.g. if I want to extract 'NP', I would need to extract the fragments of tuples that contains B-NP and I-NP.
[out]:
[('What', '0'), ('the airspeed', '2-3'), ('an unladen swallow', '5-6-7')]
(Note: the numbers in the extract tuples represent the token index.)
I have tried extracting it using the following code:
def extract_chunks(tagged_sent, chunk_type):
current_chunk = []
current_chunk_position = []
for idx, word_pos in enumerate(tagged_sent):
word, pos = word_pos
if '-'+chunk_type in pos: # Append the word to the current_chunk.
current_chunk.append((word))
current_chunk_position.append((idx))
else:
if current_chunk: # Flush the full chunk when out of an NP.
_chunk_str = ' '.join(current_chunk)
_chunk_pos_str = '-'.join(map(str, current_chunk_position))
yield _chunk_str, _chunk_pos_str
current_chunk = []
current_chunk_position = []
if current_chunk: # Flush the last chunk.
yield ' '.join(current_chunk), '-'.join(current_chunk_position)
tagged_sent = [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed', 'I-NP'), ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow', 'I-NP'), ('?', 'O')]
print (list(extract_chunks(tagged_sent, chunk_type='NP')))
But when I have adjacent chunk of the same type:
tagged_sent = [('The', 'B-NP'), ('Mitsubishi', 'I-NP'), ('Electric', 'I-NP'), ('Company', 'I-NP'), ('Managing', 'B-NP'), ('Director', 'I-NP'), ('ate', 'B-VP'), ('ramen', 'B-NP')]
print (list(extract_chunks(tagged_sent, chunk_type='NP')))
It outputs this:
[('The Mitsubishi Electric Company Managing Director', '0-1-2-3-4-5'), ('ramen', '7')]
Instead of the desired:
[('The Mitsubishi Electric Company', '0-1-2-3'), ('Managing Director', '4-5'), ('ramen', '7')]
How can this be resolved from the above code?
Other than how it's done from the code above, is there a better solution to extract the desired chunks of a specific chunk_type?
Try this, it will extract all types of chunks with the indices of their respective words.
def extract_chunks(tagged_sent, chunk_type='NP'):
out_sen = []
for idx, word_pos in enumerate(tagged_sent):
word,bio = word_pos
boundary,tag = bio.split("-") if "-" in bio else ('','O')
if tag != chunk_type:continue
if boundary == "B":
out_sen.append([word, str(idx)])
elif boundary == "I":
out_sen[-1][0] += " "+ word
out_sen[-1][-1] += "-"+ str(idx)
else:
out_sen.append([word, str(idx)])
return out_sen
Demo:
>>> tagged_sent = [('The', 'B-NP'), ('Mitsubishi', 'I-NP'), ('Electric', 'I-NP'), ('Company', 'I-NP'), ('Managing', 'B-NP'), ('Director', 'I-NP'), ('ate', 'B-VP'), ('ramen', 'B-NP')]
>>> output_sent = extract_chunks(tagged_sent)
>>> print map(tuple, output_sent)
[('The Mitsubishi Electric Company', '0-1-2-3'), ('Managing Director', '4-5'), ('ramen', '7')]
def extract_chunks(tagged_sent, chunk_type):
grp1, grp2, chunk_type = [], [], "-" + chunk_type
for ind, (s, tp) in enumerate(tagged_sent):
if tp.endswith(chunk_type):
if not tp.startswith("B"):
grp2.append(str(ind))
grp1.append(s)
else:
if grp1:
yield " ".join(grp1), "-".join(grp2)
grp1, grp2 = [s], [str(ind)]
yield " ".join(grp1), "-".join(grp2)
Output:
In [2]: l = [('The', 'B-NP'), ('Mitsubishi', 'I-NP'), ('Electric', 'I-NP'), ('Company', 'I-NP'), ('Managing', 'B-NP'),
...: ('Director', 'I-NP'), ('ate', 'B-VP'), ('ramen', 'B-NP')]
In [3]: list(extract_chunks(l, "NP"))
Out[3]:
[('The Mitsubishi Electric Company', '0-1-2-3'),
('Managing Director', '4-5'),
('ramen', '7')]
In [4]: l = [('What', 'B-NP'), ('is', 'B-VP'), ('the', 'B-NP'), ('airspeed', 'I-NP'), ('of', 'B-PP'), ('an', 'B-NP'), ('unladen', 'I-NP'), ('swallow', 'I-NP'), ('?', 'O')]
In [5]: list(extract_chunks(l, "NP"))
Out[5]: [('What', '0'), ('the airspeed', '2-3'), ('an unladen swallow', '5-6-7')]
I would do it like this:
import re
def extract_chunks(tagged_sent, chunk_type):
# compiles the expression we want to match
regex = re.compile(chunk_type)
# filters matched items in a dictionary whose keys are the matched indexes
first_step = {index_:tag[0] for index_, tag in enumerate(tagged_sent) if regex.findall(tag[1])}
# builds list of lists following output format
second_step = []
for key_ in sorted(first_step.keys()):
if second_step and int(second_step [len(second_step )-1][1].split('-')[-1]) == key_ -1:
second_step[len(second_step)-1][0] += ' {0}'.format(first_step[key_])
second_step[len(second_step)-1][1] += '-{0}'.format(str(key_))
else:
second_step.append([first_step[key_], str(key_)])
# builds output in final format
return [tuple(item) for item in second_step]
You can adapt it to use generators instead of building the whole output in memory like I am doing and refactory it for better performance (I'm in a hurry so the code is far from optimal).
Hope it helps!
I've been all day trying to solve the test_errors() function in "Exercise 48: Advanced User Input" of the book Learn Python The Hard Way.
assert_equal(), a function in the tests asks me for the tuples in order and I haven't been able to code it that way.
My loops always returns first the nouns and last the error tuples, I don't know how to break the loop so it starts again but with the right values to continue or whatever is necessary to sort this tuples in the order they should be.
Here's the code:
class Lexicon(object):
def scan(self, stringo):
vocabulary = [[('direction', 'north'), ('direction', 'south'), ('direction', 'east'), ('direction', 'west')],
[('verb', 'go'), ('verb', 'kill'), ('verb', 'eat')],
[('stop', 'the'), ('stop', 'in'), ('stop', 'of')],
[('noun', 'bear'), ('noun', 'princess')], # Remember numbers
[('error', 'ASDFADFASDF'), ('error', 'IAS')],
[('number', '1234'), ('number','3'), ('number', '91234')]]
self.stringo = stringo
got_word = ''
value = []
rompe = self.stringo.split() #split rompe en los espacios
for asigna in vocabulary:
for encuentra in asigna:
if encuentra[1] in rompe:
value.append(encuentra)
return value
eLexicon = Lexicon()
from nose.tools import *
from ex48.ex48 import eLexicon
def test_directions():
assert_equal(eLexicon.scan("north"), [('direction', 'north')])
result = eLexicon.scan("north south east")
assert_equal(result, [('direction', 'north'),
('direction', 'south'),
('direction', 'east')])
def test_verbs():
assert_equal(eLexicon.scan("go"), [('verb', 'go')])
result = eLexicon.scan("go kill eat")
assert_equal(result, [('verb', 'go'),
('verb', 'kill'),
('verb', 'eat')])
def test_stops():
assert_equal(eLexicon.scan("the"), [('stop', 'the')])
result = eLexicon.scan("the in of")
assert_equal(result, [('stop', 'the'),
('stop', 'in'),
('stop', 'of')])
def test_nouns():
assert_equal(eLexicon.scan("bear"), [('noun', 'bear')])
result = eLexicon.scan("bear princess")
assert_equal(result, [('noun', 'bear'),
('noun', 'princess')])
#def test_numbers():
# assert_equal(lexicon.scan("1234"), [('number', 1234)])
# result = lexicon.scan("3 91234")
# assert_equal(result, [('number', 3),
# ('number', 91234)])
def test_errors():
assert_equal(eLexicon.scan("ASDFADFASDF"), [('error', 'ASDFADFASDF')])
result = eLexicon.scan("bear IAS princess")
assert_equal(result, [('noun', 'bear'),
('error', 'IAS'),
('noun', 'princess')])
======================================================================
FAIL: tests.ex48_tests.test_errors
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/home/totoro/Desktop/Python/projects/ex48/tests/ex48_tests.py", line 43, in test_errors
('noun', 'princess')])
AssertionError: Lists differ: [('noun', 'bear'), ('noun', 'p... != [('noun', 'bear'), ('error', '...
First differing element 1:
('noun', 'princess')
('error', 'IAS')
- [('noun', 'bear'), ('noun', 'princess'), ('error', 'IAS')]
+ [('noun', 'bear'), ('error', 'IAS'), ('noun', 'princess')]
----------------------------------------------------------------------
Ran 5 tests in 0.006s
Many thanks in advance for taking the time.
The words in the test are in the same order going in as coming out. As such you need to re-order your for-loops to iterate over the input first:
value = []
for rompe in stringo.split():
for asigna in vocabulary:
for encuentra in asigna:
if encuentra[1] == rompe:
value.append(encuentra)
This will return the encuentras in the correct order.
Note 1: You should not be hard-coding the numbers or errors.
Note 2: You can drastically reduce the complexity of this algorithm by using dictionary or two.
Example:
vocabulary = {
'direction': 'north east south west up down left right back'.split(),
'noun': 'bear princess door cabinet'.split(),
'stop': 'the in of from at it'.split(),
'verb': 'go kill eat stop'.split(),
}
'''
This creates a lookup using a dictionary-comprehension:
{'at': 'stop',
# [...]
'up': 'direction',
'west': 'direction'}
'''
classifications = {i: k for k, v in vocabulary.iteritems() for i in v}
def classify(word):
try:
return 'number', int(word)
except ValueError:
return classifications.get(word, 'error'), word
def scan(words):
return [classify(word) for word in words.split()]
for word in self.stringo.split():
for pair in vocabulary:
if pair[0][1] == word:
value.append(pair[0])
elif pair[1][1] == word:
value.append(pair[1])
elif pair[2][1] == word:
value.append(pair[2])
elif pair[3][1] == word:
value.append(pair[3])
I just finished this exercise, hope this gives some new ideas to some of you.
This is my solution:
#Set up datastructure
direction = ["north", "east", "south", "west", "up", "right", "down", "left", "back"]
verb = ["go", "stop", "kill", "eat"]
stop = ["the", "in", "of", "from", "at", "it"]
noun = ["door", "bear", "princess", "cabinet"]
vocabulary = [(direction, 'direction'), (verb, 'verb'), (stop, 'stop'), (noun, 'noun')]
def scan(sentence):
#searches the words in the datastructure, if not found checks if it is an integer, if not returns error.
results = []
words = sentence.split()
for word in words:
found = False
for category in vocabulary:
if word.lower() in category[0]:
results.append((category[1], word))
found = True
else:
pass
if found is False and isInt_str(word) is True:
results.append(('number', int(word)))
elif found is False and isInt_str(word) is False:
results.append(('error', word))
elif found is True:
pass
else:
print("I'm terribly sorry, but something you entered is neither a word nor a number.")
return results
def isInt_str(string):
#returns True or False if string equals an integer. (i.e. 2 = True, 2*-2 = True 2**0,5 = False)
string = str(string).strip()
return string=='0' or (string if string.find('..') > -1 else string.lstrip('-+').rstrip('0').rstrip('.')).isdigit()
I have been attempting to work my way through Learn Python the Hard Way, and on Exercise 48 I continue to get an error when I run nosetests. I am using code that other people have verified on the site to work, but no matter what I continue to get this error:
======================================================================
ERROR: tests.ex48_tests.test_directions
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nose/case.py", line 197, in runTest
self.test(*self.arg)
File "/Users/AlexanderMariona/Documents/Home/Programming/Python/Projects/Exercise 48/tests/ex48_tests.py", line 6, in test_directions
assert_equal(lexicon.scan("north"), [('direction', 'north')])
AttributeError: 'module' object has no attribute 'scan'
I get this error 6 times, one for each of my test functions.
Here is what I'm using for my code:
lexicon.py:
class Lexicon(object):
directions = ['north', 'south', 'east', 'west', 'down', 'up', 'down', 'right']
verbs = ['go', 'stop', 'kill', 'eat']
stops = ['the', 'in', 'at', 'of', 'from', 'at', 'it']
nouns = ['door', 'bear', 'princess', 'cabinet']
def scan(thewords):
thewords = thewords.split()
sentence = []
for i in thewords:
if i in directions:
sentence.append(('direction', i))
elif i in verbs:
sentence.append(('verb', i))
elif i in stops:
sentence.append(('stop', i))
elif i in nouns:
sentence.append(('noun', i))
elif i.isdigit():
sentence.append(('number', convert_number(i)))
else:
sentence.append(('error', i))
return sentence
def convert_number(s):
try:
return int(s)
except ValueError:
return None
lexicon = Lexicon()
(This was written by Dairylee.)
ex48_tests.py:
from nose.tools import *
from ex48 import lexicon
def test_directions():
assert_equal(lexicon.scan("north"), [('direction', 'north')])
result = lexicon.scan("north south east")
assert_equal(result, [('direction', 'north'),
('direction', 'south'),
('direction', 'east')])
def test_verbs():
assert_equal(lexicon.scan("go"), [('verb', 'go')])
result = lexicon.scan("go kill eat")
assert_equal(result, [('verb', 'go'),
('verb', 'kill'),
('verb', 'eat')])
def test_stops():
assert_equal(lexicon.scan("the"), [('stop', 'the')])
result = lexicon.scan("the in of")
assert_equal(result, [('stop', 'the'),
('stop', 'in'),
('stop', 'of')])
def test_nouns():
assert_equal(lexicon.scan("bear"), [('noun', 'bear')])
result = lexicon.scan("bear princess")
assert_equal(result, [('noun', 'bear'),
('noun', 'princess')])
def test_numbers():
assert_equal(lexicon.scan("1234"), [('number', 1234)])
result = lexicon.scan("3 91234")
assert_equal(result, [('number', 3),
('number', 91234)])
def test_errors():
assert_equal(lexicon.scan("ASDFADFASDF"), [('error', 'ASDFADFASDF')])
result = lexicon.scan("bear IAS princess")
assert_equal(result, [('noun', 'bear'),
('error', 'IAS'),
('noun', 'princess')])
(This is copied verbatim from LPTHW.)
setup.py:
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
config = {
'name': 'Excercise 48',
'description': 'LPTHW',
'version': '0.1',
'author': 'My Name',
'author_email': 'My E-Mail',
'url': 'None',
'download_url': 'None',
'packages': ['ex48'],
'scripts': [],
'install_requires': ['nose']
}
setup(**config)
And here is the directory of the package:
Exercise 48/
bin/
docs/
ex48/
__init__.py
lexicon.py
setup.py
tests/
__init__.py
ex48_tests.py
What exactly is causing this error?
This error happens because there's no function scan in the module lexicon. There's a method in the class Lexicon, then it should be called like a method (note that self argument is missing).
On the other hand, Lexicon does not have to exist as a class at all, scan can be a module-level function.