Where am I going wrong with patching a function with mock_open? - python

I have a function that calls a sub-function to open up a file. I am trying to test the parent function, but I want to patch the sub-function and have it return the data I pass in (as if it read from a file).
tests.py
# Read in the sample data
__SAMPLE_LOG = os.path.join(settings.BASE_DIR, "apps/tests/log_viewer/sample_logs/sample_manager_log.log")
sample_data = []
for line in reversed_lines(open(__SAMPLE_LOG)):
sample_data.append(line)
sample_data = ('').join(sample_data)
class ReadLog(TestCase):
#patch('apps.log_viewer.utils.reversed_lines', new_callable = mock_open, read_data = sample_data)
def test_returnsDictionaryContainingListOfDictionaries(self, mock_file):
activity = read_log()
# Make sure the sample data was read ==> this fails.
self.assertEqual(open(settings.ACTIVITY_LOG_FILE).read(), sample_data)
utils.py
def read_log():
# This is the line I am trying to patch
for line in reversed_lines(open(settings.ACTIVITY_LOG_FILE)):
# process data
# see: https://stackoverflow.com/questions/260273/most-efficient-way-to-search-the-last-x-lines-of-a-file-in-python/260433#260433
def reversed_lines(file):
"Generate the lines of file in reverse order."
part = ''
for block in reversed_blocks(file):
for c in reversed(block):
if c == '\n' and part:
yield part[::-1]
part = ''
part += c
if part: yield part[::-1]
def reversed_blocks(file, blocksize=4096):
"Generate blocks of file's contents in reverse order."
file.seek(0, os.SEEK_END)
here = file.tell()
while 0 < here:
delta = min(blocksize, here)
here -= delta
file.seek(here, os.SEEK_SET)
yield file.read(delta)
The error
I am trying to patch reversed_lines() in utils.py within the read_log() method, but read_log() is still reading from the actual log, indicating that I am not patching reversed_lines() correctly.
When I change
#patch('apps.log_viewer.utils.reversed_lines', new_callable = mock_open, read_data = sample_data)
to
#patch('builtins.open', new_callable = mock_open, read_data = sample_data)
I get
======================================================================
ERROR: test_returnsDictionaryContainingListOfDictionaries
(tests.log_viewer.test_utils.ReadLog)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/unittest/mock.py", line 1209, in patched
return func(*args, **keywargs)
File "/webapp/apps/tests/log_viewer/test_utils.py", line 32, in test_returnsDictionaryContainingListOfDictionaries
activity = read_log()
File "/webapp/apps/log_viewer/utils.py", line 64, in read_log
for line in reversed_lines(open(settings.ACTIVITY_LOG_FILE)):
File "/webapp/apps/log_viewer/utils.py", line 173, in reversed_lines
for block in reversed_blocks(file):
File "/webapp/apps/log_viewer/utils.py", line 164, in reversed_blocks
while 0 < here:
TypeError: '<' not supported between instances of 'int' and 'MagicMock'
Where am I going wrong?

Following the example from the docs at https://docs.python.org/3.3/library/unittest.mock.html#mock-open I think you want
#patch('builtins.open', mock_open(read_data = sample_data), create=True)
However reading through the source of mock_open: https://github.com/python/cpython/blob/3.7/Lib/unittest/mock.py#L2350
It appears that the tell method for filehandles is not implemented by the mock. The only supported methods are read, readline, readlines, write and iterating over the contents. You'll need to manually set up the mock for the tell method. This is not a general implementation but will work in your specific case:
class ReadLog(TestCase):
#patch('builtins.open', mock_open(read_data = sample_data), create=True)
def test_returnsDictionaryContainingListOfDictionaries(self, mock_file):
mock_file.return_value.tell.return_value = len(sample_data)
...

Related

String issue for Python multiprocessing

facing some issues with string parsing and the multiprocessing library. Here is my code, and I also outline the function calls and error.
def semi_func(tile):
with open(tile, 'rb') as f:
img = Image.open(BytesIO(f.read()))
resized_im, seg_map = MODEL.run(img)
vis_segmentation_tiles(str(tile),resized_im, seg_map)
x = np.unique(seg_map)
x = x.tolist()
print("THIS IS X", x)
ans_tiles[str(tile)] = x
print(x)
return ans_tiles
def split_tiles_new(image_path, tiledir):
print("1")
pool = Pool(processes=5)
print("2")
num_tiles = 9
tiles = image_slicer.slice(image_path, num_tiles, save=False)
print("3")
print(tiles)
image_slicer.save_tiles(tiles, directory=tiledir)
print(tiles)
print("TILES ABOEVE")
onlytiles = [os.path.join(tiledir,f) for f in listdir(tiledir) if isfile(join(tiledir, f))]
ans_tiles = {}
print(onlytiles)
onlytiles = list(map(str, onlytiles))
for t in onlytiles:
print(t)
for tile in onlytiles:
print(tile)
pool.map(semi_func,tile)
pool.close()
pool.join()
print(ans_tiles)
return ans_tiles
Here's what I'm feeding in terms of my functions:
ans_tiles = split_tiles_new(local_jpg, tiledir)
local_jpg = 'wheat044146108.jpg'
tiledir = 'tiles044146108'
Inside tiledir (the directory), there's a bunch of tiled images:
['tiles044146108/_03_02.png', 'tiles044146108/_03_01.png', 'tiles044146108/_02_02.png', 'tiles044146108/_01_01.png', 'tiles044146108/_03_03.png', 'tiles044146108/_01_02.png', 'tiles044146108/_02_01.png', 'tiles044146108/_02_03.png', 'tiles044146108/_01_03.png']
That's what is in the variable 'onlytiles'.
But my issue is this error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "serve_wh.py", line 128, in semi_func
with open(tile, 'rb') as f:
FileNotFoundError: [Errno 2] No such file or directory: 't'
"""
I am not sure why it is doing further slicing of the string? Any idea what I can do to ensure it just grabs each file from 'onlyfiles' list separately in this?
Your iterable is a filename string thats why it's trying to open file with name t. Check Pool.map second argument.
pool.map(semi_func,tile)
You should use
pool.map(semi_func,onlytiles)
Without the for loop so that it iterates over the list rather than string.

Getting an attribute error on something I explicitly defined in init

I'm trying to make a config file manager that stores dictionaries in a pickle file, in a config folder in the same directory. I'm having issues with a pesky AttributeError that claims I don't have an attribute '_name', which I defined in the first line of init().
The main program has 2 classes, one inheriting from Exceptions (the error class) and the other inheriting from dict (the main class). The init takes a name and finds if the file exists. If it doesn't, then an empty dictionary is written to the given file.
I receive the error when I:
1. open an existing file to print the keys
2. write a key and value pair to an existing file
3. try and get a key from the dictionary
I've tried not calling dict.setitem in init and nothing changed, I still get the error. I tried just loading the file while doing absolutely nothing else to it, and couldn't get it to work.
import os
import pickle
class ConfigDict(dict):
'''
This class is responsible for all main functions.
Pass a string as an argument without a file type.
Keywords such as 'database' and 'aiconstructer' work well.
'''
def __init__(self, name):
self._name = name+'.pickle'
if not os.path.isfile(self._name):
with open(self._name, 'wb') as f:
pickle.dump('{}', f)
with open(self._name,'rb') as f:
obj = pickle.load(f)
if len(obj) > 2:
for k in obj.keys():
dict.__setitem__(self, k, obj[k])
def __getitem__(self, key):
if not key in self.keys():
raise ConfigKeyError(self, key)
return dict.__getitem__(key)
def __setitem__(self, key, val):
dict.__setitem__(self, key, val)
with open(self._name, 'wb') as f:
pickle.dump(self, f)
The Traceback is as follows:
Traceback (most recent call last):
File "interface.py", line 12, in <module>
test = ConfigDict('alpha')
File "/home/bear/Desktop/Config/confdict.py", line 35, in __init__
obj = pickle.load(f)
File "/home/bear/Desktop/Config/confdict.py", line 47, in __setitem__
with open(self._name, 'wb') as f:
AttributeError: 'ConfigDict' object has no attribute '_name'
The code for interface.py that initiates this is:
import sys
from confdict import ConfigDict, ConfigKeyError
test = ConfigDict('alpha')
if len(sys.argv) == 3:
key = sys.argv[1]
val = sys.argv[2]
print('Writing')
test[key] = val
print('Done')
elif len(sys.argv) == 2:
key = sys.argv[1]
print('{}:{}'.format(key,test[key]))
else:
print('Keys : Values')
print('-----------------')
for k in test.keys():
print('{} : {}'.format(k,test[k]))
I expect to be able to load the contents of the pickle file into self, but instead I get the AttributeError. Is it something I'm doing grammatically wrong, or is there a rule that I forgot to follow? Thank you very much for any help in advance.

Parsing large XML file using 'xmltodict' module results in OverflowError

I have a fairly large XML File of about 3GB size that I am wanting to parse in streaming mode using 'xmltodict' utility. The code I have iterates through each item and forms a dictionary item and appends to the dictionary in memory, eventually to be dumped as json in a file.
I have the following working perfectly on a small xml data set:
import xmltodict, json
import io
output = []
def handle(path, item):
#do stuff
return
doc_file = open("affiliate_partner_feeds.xml","r")
doc = doc_file.read()
xmltodict.parse(doc, item_depth=2, item_callback=handle)
f = open('jbtest.json', 'w')
json.dump(output,f)
On a large file, I get the following:
Traceback (most recent call last):
File "jbparser.py", line 125, in <module>
**xmltodict.parse(doc, item_depth=2, item_callback=handle)**
File "/usr/lib/python2.7/site-packages/xmltodict.py", line 248, in parse
parser.Parse(xml_input, True)
OverflowError: size does not fit in an int
The exact location of exception inside xmltodict.py is:
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', **kwargs):
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
try:
parser.ParseFile(xml_input)
except (TypeError, AttributeError):
**parser.Parse(xml_input, True)**
return handler.item
Any way to get around this? AFAIK, the xmlparser object is not exposed for me to play around and change 'int' to 'long'. More importantly, what is really going on here?
Would really appreciate any leads on this. Thanks!
Try to use marshal.load(file) or marshal.load(sys.stdin) in order to unserialize the file (or to use it as a stream) instead of reading the whole file into memory and then parse it as a whole.
Here is an example:
>>> def handle_artist(_, artist):
... print artist['name']
... return True
>>>
>>> xmltodict.parse(GzipFile('discogs_artists.xml.gz'),
... item_depth=2, item_callback=handle_artist)
A Perfect Circle
Fantômas
King Crimson
Chris Potter
...
STDIN:
import sys, marshal
while True:
_, article = marshal.load(sys.stdin)
print article['title']

Is it possible to print a next line in a code?

Is it possible to make a method, which prints a next line of a code?
def print_next_line():
sth
import fxx
print 'XXX'
print_next_line()
file.split('/')
....
>>> 'XXX'
>>> 'file.split('/')'
I was thinking that It could be somewhere in the stack, but I'm not sure because it is next, not previous line.
Straight approach. I use inspect module to determine file and line where print_next_line was called. Later I read the file to find next string. You might want to add some error handling here (what if there is no next line in a file? and so on)
def print_next_line():
def get_line(f, lineno):
with open(f) as fp:
lines = fp.readlines()
return lines[lineno-1]
import inspect
callerframerecord = inspect.stack()[1]
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
line_ = info.lineno
file_ = info.filename
print get_line(file_, line_ + 1)
print 'XXX'
a = 1
print_next_line()
b = a*2
All you need is a profiling tool or just a debugger.
Use Python's inspect module:
import inspect
def print_next_line():
lineno = inspect.currentframe().f_back.f_lineno
with open(__file__) as f:
print(f.readlines()[lineno].rstrip())
Well you could open() your .py file and iterate to find specific line, then print it.

Python, basic question: How do I download multiple url's with urllib.request.urlretrieve

I have the following fully functional, working code:
import urllib.request
import zipfile
url = "http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sop"
filename = "C:/test/archive.zip"
destinationPath = "C:/test"
urllib.request.urlretrieve(url,filename)
sourceZip = zipfile.ZipFile(filename, 'r')
for name in sourceZip.namelist():
sourceZip.extract(name, destinationPath)
sourceZip.close()
It will work perfect a few times, but because the server I am retrieving the file from has some limits, I get this error once I reach that daily limit:
Traceback (most recent call last):
File "script.py", line 11, in <module>
urllib.request.urlretrieve(url,filename)
File "C:\Python32\lib\urllib\request.py", line 150, in urlretrieve
return _urlopener.retrieve(url, filename, reporthook, data)
File "C:\Python32\lib\urllib\request.py", line 1591, in retrieve
block = fp.read(bs)
ValueError: read of closed file
How do I alter the script, so that it includes a list of multiple url's, instead of one single url, and the script keeps trying to download from the list until one succeeds, and then continues with the unzip. I just need one successful download.
Apologies for being very new to Python but I can't figure this one out. I'm assuming I have to change the variable to look something like this:
url = {
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soe",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sod",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soc",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sob",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soa",
}
and then changing this line into some sort of loop:
urllib.request.urlretrieve(url,filename)
You want to put your urls in a list, then loop through that list and try each one. You catch but ignore exceptions they throw, and break the loop once one succeeds. Try this:
import urllib.request
import zipfile
urls = ["http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sop", "other url", "another url"]
filename = "C:/test/test.zip"
destinationPath = "C:/test"
for url in urls:
try:
urllib.request.urlretrieve(url,filename)
sourceZip = zipfile.ZipFile(filename, 'r')
break
except ValueError:
pass
for name in sourceZip.namelist():
sourceZip.extract(name, destinationPath)
sourceZip.close()
import urllib.request
import zipfile
urllist = ("http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sop",
"another",
"yet another",
"etc")
filename = "C:/test/test.zip"
destinationPath = "C:/test"
for url in urllist:
try:
urllib.request.urlretrieve(url,filename)
except ValueError:
continue
sourceZip = zipfile.ZipFile(filename, 'r')
for name in sourceZip.namelist():
sourceZip.extract(name, destinationPath)
sourceZip.close()
break
This will work assuming you just want to try them each once until one works, then stop.
For a full-fledged distributed tasks you can checkout Celery and their retry mechanism Celery-retry
or you can have a look at Retry-decorator,
Example:
import time
# Retry decorator with exponential backoff
def retry(tries, delay=3, backoff=2):
"""Retries a function or method until it returns True.
delay sets the initial delay, and backoff sets how much the delay should
lengthen after each failure. backoff must be greater than 1, or else it
isn't really a backoff. tries must be at least 0, and delay greater than
0."""
if backoff <= 1:
raise ValueError("backoff must be greater than 1")
tries = math.floor(tries)
if tries < 0:
raise ValueError("tries must be 0 or greater")
if delay <= 0:
raise ValueError("delay must be greater than 0")
def deco_retry(f):
def f_retry(*args, **kwargs):
mtries, mdelay = tries, delay # make mutable
rv = f(*args, **kwargs) # first attempt
while mtries > 0:
if rv == True: # Done on success
return True
mtries -= 1 # consume an attempt
time.sleep(mdelay) # wait...
mdelay *= backoff # make future wait longer
rv = f(*args, **kwargs) # Try again
return False # Ran out of tries :-(
return f_retry # true decorator -> decorated function
return deco_retry # #retry(arg[, ...]) -> true decorator
urls = [
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soe",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sod",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soc",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2sob",
"http://url.com/archive.zip?key=7UCxcuCzFpYeu7tz18JgGZFAAgXQ2soa",
]
for u in urls:
urllib.request.urlretrieve(u,filename)
... rest of code ...

Categories