How can I write automatic tests for this Python code? - python

My script core.py which is found in the folder preprocessing takes a string and cleans it. It is part of a bigger model (see the last import, but it's unimportant). The dict_english, found in app/core/preprocessing/constants, is just a dictionary of uncommon English words that I replace with other words.
import string
from app.core.preprocessing.constants import dict_english
from app.core.generic.step import Step
from typing import Optional
from app.api.model.my_project_parameters import MyProjectParameters
class TextPreprocessingBase(Step[str, str]):
def process(self, input_value: str, parameters: Optional[MyProjectParameters] = None) -> str:
input_value = input_value.replace("'", '')
input_value = input_value.replace("\"", '')
printable = set(string.printable)
filter(lambda x: x in printable, input_value)
new_string=''.join(filter(lambda x: x in printable, input_value))
return new_string
class TextPreprocessingEnglish(TextPreprocessingBase):
def process(self, input_value: str, parameters: Optional[MyProjectParameters] = None) -> str:
process_english = super().process(input_value, parameters)
for word, initial in dict_english.items():
process_english = process_english.replace(word.lower(), initial)
return process_english
It's easy to test:
string_example= """ Random 'text' ✓"""
a = TextPreprocessingEnglish()
output = a.process(string_example)
print(output)
It prints:
Random text
But I want to write some automatic tests. I thought:
import pytest
from app.core.preprocessing.core import TextPreprocessingBase, TextPreprocessingEnglish
class TestEnglishPreprocessing:
#pytest.fixture(scope='class')
def english_preprocessing:
...
But I'm stuck here. I just want to test my code on several various strings that I manually write. Is it possible to do this like that or do I just write it like the simple test example above?

This sounds like something you could solve by parametrizing a test, for example:
import pytest
from process import TextPreprocessingEnglish
#pytest.mark.parametrize(
"input,expected",
[
(""" Random 'text' ✓""", "Random text"),
(""" Some other 'text' ✓""", "Some other text"),
],
)
def test_process(input, expected):
a = TextPreprocessingEnglish()
output = a.process(input)
assert output == expected

Related

Pytest how to change printing behavior with #pytest.mark.parametrize

I have a parameterized test that receives a long string as an argument, and the problem is that when executing pytest it prints the entire string
#pytest.mark.parametrize(
"long_string_p1, long_string_p2",
[
(a,b) for a,b in zip(list_str_1, list_str_2)
]
)
def test_with_long_strings(long_string_p1, long_string_p2):
# Logic
assert long_string_p1 != long_string_p2
pytest
test_example.py::test_with_long_strings[looooooooong string1-loooooooooong string2] PASSED
I would like it to only show the last part of the string, somehow change the logic so that it doesn't show the whole string but only a part
You should be able to use pytest.param to enhance the id values used. Could make a helper to determine the name based on your custom logic.
Here's an example where it strips off the last value in the string and joins them with a dash:
import pytest
list_str_1 = ["looooooooong string1"]
list_str_2 = ["loooooooooong string2"]
def make_param(args):
return pytest.param(*args, id='-'.join([s.split(" ")[-1] for s in args]))
#pytest.mark.parametrize(
"long_string_p1, long_string_p2",
list(map(make_param, zip(list_str_1, list_str_2)))
)
def test_with_long_strings(long_string_p1, long_string_p2):
assert long_string_p1 != long_string_p2
You can also supply a function as the ids argument to format each entry individually:
import pytest
list_str_1 = ["looooooooong string1"]
list_str_2 = ["loooooooooong string2"]
def id_function(val):
return val.split(" ")[-1]
#pytest.mark.parametrize(
"long_string_p1, long_string_p2",
[*zip(list_str_1, list_str_2)],
ids=id_function
)
def test_with_long_strings(long_string_p1, long_string_p2):
assert long_string_p1 != long_string_p2

Creating a decorator to mock input() using monkeypatch in pytest

End goal: I want to be able to quickly mock the input() built-in function in pytest, and replace it with an iterator that generates a (variable) list of strings. This is my current version, which works:
from typing import Callable
import pytest
def _create_patched_input(str_list: list[str]) -> Callable:
str_iter = iter(str_list.copy())
def patched_input(prompt: str) -> str: # has the same signature as input
val = next(str_iter)
print(prompt + val, end="\n"),
return val
return patched_input
#pytest.fixture
def _mock_input(monkeypatch, input_string_list: list[str]):
patched_input = _create_patched_input(input_string_list)
monkeypatch.setattr("builtins.input", patched_input)
def mock_input(f):
return pytest.mark.usefixtures("_mock_input")(f)
# Beginning of test code
def get_name(prompt: str) -> str:
return input(prompt)
#mock_input
#pytest.mark.parametrize(
"input_string_list",
(["Alice", "Bob", "Carol"], ["Dale", "Evie", "Frank", "George"]),
)
def test_get_name(input_string_list):
for name in input_string_list:
assert get_name("What is your name?") == name
However, this feels incomplete for a few reasons:
It requires the parameter name in the parameterize call to be input_string_list, which feels brittle.
If I move the fixtures into another function, I need to import both mock_input and _mock_input.
What would feel correct to me is to have a decorator (factory) that can be used like #mock_input(strings), such that you could use it like
#mock_input(["Alice", "Bob", "Carol"])
def test_get_name():
....
or, more in line with my use case,
#pytest.mark.parametrize(
"input_list", # can be named whatever
(["Alice", "Bob", "Carol"], ["Dale", "Evie", "Frank", "George"]),
)
#mock_input(input_list)
def test_get_name():
....
The latter I don't think you can do, as pytest wont recognize it as a fixture. What's the best way to do this?
I'd use indirect parametrization for mock_input, since it cannot work without receiving parameters. Also, I would refactor mock_input into a fixture that does passing through the arguments it receives, performing the mocking on the way. For example, when using unittest.mock.patch():
import pytest
from unittest.mock import patch
#pytest.fixture
def inputs(request):
texts = requests.param # ["Alice", "Bob", "Carol"] etc
with patch('builtins.input', side_effect=texts):
yield texts
Or, if you want to use monkeypatch, the code gets a bit more complex:
#pytest.fixture
def inputs(monkeypatch, request):
texts = requests.param
it = iter(texts)
def fake_input(prefix):
return next(it)
monkeypatch.setattr('builtins.input', fake_input)
yield texts
Now use inputs as test argument and parametrize it indirectly:
#pytest.mark.parametrize(
'inputs',
(["Alice", "Bob", "Carol"], ["Dale", "Evie", "Frank", "George"]),
indirect=True
)
def test_get_name(inputs):
for name in inputs:
assert get_name("What is your name?") == name

Mock two methods for the same function python

I want to mock two methods (predict_proba and classes_) of a sklearn model. I have a function that receives a template and text, and returns a label and a score.
import numpy as np
from unittest.mock import MagicMock
def model_predict_proba(model, text):
pred_proba_model = model.predict_proba([text])
score = pred_proba_model.max()
label = model.classes_[np.argmax(pred_proba_model)]
return label, score
def test_model_predict_proba():
mock_model = MagicMock()
mock_model.predict_proba.return_value = np.array([0.90, 0.23])
mock_model.classes_.return_value= np.array(['FOOD', 'DRINK'])
text = 'Apple pie'
expected = ("FOOD", 0.90)
result = model_predict_proba(mock_model, text)
assert result == expected
When I run this test, I get the following error message:
Can someone help me?
This should do the trick:
def model_predict_proba(model, text):
pred_proba_model = model.predict_proba([text])
score = pred_proba_model.max()
label = model.classes_[np.argmax(pred_proba_model)]
return label, score
def test_model_predict_proba():
mock_model = MagicMock()
mock_model.predict_proba.return_value.max.return_value = 0.90
mock_model.classes_.__getitem__.return_value ='FOOD'
text = 'Apple pie'
expected = ("FOOD", 0.90)
result = model_predict_proba(mock_model, text)
assert result == expected
Note that since you're mocking your model, this test is not actually testing the model in any useful way -- I'm assuming you're writing this function just as an exercise to understand how MagicMock works. The purpose of mocking is usually to simulate the inputs or dependencies of the thing you're testing, rather than the thing itself.

Line 34: TypeError: __init__() takes exactly 3 arguments (1 given)

class Solution:
def __init__(self, secret, guess):
self.secret = secret
self.guess = guess
def getHint(self):
right=0; wrong=0
listSecret = list(self.secret)
listGuess = list(self.guess)
for i in range(len(listSecret)):
if listSecret[i] == listGuess[i]:
right = right + 1
else:
wrong = wrong + 1
return str(right)+ "A" + str(wrong) + "B"
guessG = raw_input("")
secretS = raw_input("")
print Solution(str(guessG), str(secretS)).getHint()
When I run the code in pycharm, it works. However, when I submit the code to leetcode, it warns "runtime error", see the picture.
This is a leetcode service issue
In contrast to the popular format when communication between the test driver and your program is done with standard IO, this service provide another way.
In the begining you are given a code snippet that states communcation interface implicitly. You should not change it, i.e change method signatures including constructor signature. Your input is then passed as arguments to the code snippet method with result expecting to be its method return value.
How all it works and way strange error is generated
The code you pass in your online editor is enreached with some additional code that generates that behaviour.
To check this idea I've written a program that prints its own code (by modifying given snippet):
class Solution(object):
def getHint(self, secret, guess):
"""
:type secret: str
:type guess: str
:rtype: str
"""
import sys
print sys.argv ## this prints program arguments
## argv[0] is the name of file being executed. Just print it!
with open(sys.argv[0], 'r') as fin:
print fin.read()
The result of execution on the codeleet service was
['/usr/lib/gcc/x86_64-linux-gnu/4.9.1/.cache/prog_joined.py', '-recursion_limit', '8100']
# coding: utf-8
from precompiled.__serializer__ import __Serializer__
from precompiled.__deserializer__ import __Deserializer__
from precompiled.__utils__ import __Utils__
from precompiled.listnode import ListNode
from precompiled.interval import Interval
from precompiled.treenode import TreeNode
from precompiled.treelinknode import TreeLinkNode
from precompiled.undirectedgraphnode import UndirectedGraphNode
from precompiled.randomlistnode import RandomListNode
from precompiled.point import Point
import precompiled.__settings__
import array
import bisect
import collections
import copy
import heapq
import itertools
import math
import operator
import re
import sets
import string
# user submitted code insert below
class Solution(object):
def getHint(self, secret, guess):
"""
:type secret: str
:type guess: str
:rtype: str
"""
import sys
print sys.argv
with open(sys.argv[0], 'r') as fin:
print fin.read()
import sys
def _driver():
SEPARATOR = "\x1b\x09\x1d"
f = open("user.out", "w", 0)
lines = __Utils__().read_lines()
while True:
try:
line = lines.next()
param_1 = __Deserializer__().to_string(line)
line = lines.next()
param_2 = __Deserializer__().to_string(line)
ret = Solution().getHint(
param_1, param_2
)
out = __Serializer__().serialize(ret)
print >> f, "%s" % out
sys.stdout.write(SEPARATOR)
except StopIteration:
break
if __name__ == '__main__':
_driver()
This shows the whole program being executed. As you can note it is a bit expanded version of your own code. The version has the following lines:
ret = Solution().getHint(
param_1, param_2
)
They are the source of your problem

How to extract functions used in a python code file?

I would like to create a list of all the functions used in a code file. For example if we have following code in a file named 'add_random.py'
`
import numpy as np
from numpy import linalg
def foo():
print np.random.rand(4) + np.random.randn(4)
print linalg.norm(np.random.rand(4))
`
I would like to extract the following list:
[numpy.random.rand, np.random.randn, np.linalg.norm, np.random.rand]
The list contains the functions used in the code with their actual name in the form of 'module.submodule.function'. Is there something built in python language that can help me do this?
You can extract all call expressions with:
import ast
class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls = []
self.current = None
def visit_Call(self, node):
# new call, trace the function expression
self.current = ''
self.visit(node.func)
self.calls.append(self.current)
self.current = None
def generic_visit(self, node):
if self.current is not None:
print "warning: {} node in function expression not supported".format(
node.__class__.__name__)
super(CallCollector, self).generic_visit(node)
# record the func expression
def visit_Name(self, node):
if self.current is None:
return
self.current += node.id
def visit_Attribute(self, node):
if self.current is None:
self.generic_visit(node)
self.visit(node.value)
self.current += '.' + node.attr
Use this with a ast parse tree:
tree = ast.parse(yoursource)
cc = CallCollector()
cc.visit(tree)
print cc.calls
Demo:
>>> tree = ast.parse('''\
... def foo():
... print np.random.rand(4) + np.random.randn(4)
... print linalg.norm(np.random.rand(4))
... ''')
>>> cc = CallCollector()
>>> cc.visit(tree)
>>> cc.calls
['np.random.rand', 'np.random.randn', 'linalg.norm']
The above walker only handles names and attributes; if you need more complex expression support, you'll have to extend this.
Note that collecting names like this is not a trivial task. Any indirection would not be handled. You could build a dictionary in your code of functions to call and dynamically swap out function objects, and static analysis like the above won't be able to track it.
In general, this problem is undecidable, consider for example getattribute(random, "random")().
If you want static analysis, the best there is now is jedi
If you accept dynamic solutions, then cover coverage is your best friend. It will show all used functions, rather than only directly referenced though.
Finally you can always roll your own dynamic instrumentation along the lines of:
import random
import logging
class Proxy(object):
def __getattr__(self, name):
logging.debug("tried to use random.%s", name)
return getattribute(_random, name)
_random = random
random = Proxy()

Categories