using generators and cStringIO in python to stream strings - python

I'm trying to read a very large string stream using cStringIO in a python dictionary:
def stream_read(self, path):
try:
# create a string stream from the contents at 'path'
# note: the string at self._my_dict[path] is 7MB in size
stream = StringIO.StringIO(self._my_dict[path])
while True:
# buffer size is 128kB, or 128 * 1024
buf = stream.read(self.buffer_size)
if buf != '':
yield buf
else:
raise StopIteration
except KeyError:
raise IOError("Could not get content")
And in my test suite, I'm testing this function by first testing stream_write, asserting that the data exists at that path, and then calling stream_read:
def test_stream(self):
filename = self.gen_random_string()
# test 7MB
content = self.gen_random_string(7 * 1024 * 1024)
# test stream write
io = StringIO.StringIO(content)
self._storage.stream_write(filename, io)
io.close()
self.assertTrue(self._storage.exists(filename))
# test read / write
data = ''
for buf in self._storage.stream_read(filename):
data += buf
self.assertEqual(content, data)
Yet in my test suite, I'm catching an AssertionError:
======================================================================
FAIL: test_stream (test_swift_storage.TestSwiftStorage)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/bacongobbler/.../test/test_local_storage.py", line 44, in test_stream
self.assertEqual(content, data)
AssertionError: '[squelched]' != '<cStringIO.StringI object at 0x3148e70>'
----------------------------------------------------------------------
Ran 28 tests in 20.495s
FAILED (failures=1)
It looks related to an issue I posted last week, but I'm still not quite sure I understand why stream is getting set to the Generator as a string in this case.
If anyone wants to take a closer look at the source code, it's all up at https://github.com/bacongobbler/docker-registry/blob/106-swift-storage/test/utils/mock_swift_storage.py

You store just the StringIO object when calling self._storage.stream_write(filename, io):
def put_content(self, path, content, chunk=None):
path = self._init_path(path)
try:
self._swift_container[path] = content
except Exception:
raise IOError("Could not put content")
where content is the io object you passed in.
Later on, you pass that file object to StringIO again:
stream = StringIO.StringIO(self.get_content(path))
This calls str() on self.get_content(path), storing the string representation of a cStringIO.StringI() instance:
>>> from cStringIO import StringIO
>>> str(StringIO('test data'))
'<cStringIO.StringI object at 0x1074ea470>'
Your reading code works fine, it is your writing mock that needs to actually take the data out of the StringIO object.
A .read() call will do here:
def put_content(self, path, content, chunk=None):
path = self._init_path(path)
try:
self._swift_container[path] = content.read()
except Exception:
raise IOError("Could not put content")

Related

Upload file to Databricks DBFS with Python API

I'm following the Databricks example for uploading a file to DBFS (in my case .csv):
import json
import requests
import base64
DOMAIN = '<databricks-instance>'
TOKEN = '<your-token>'
BASE_URL = 'https://%s/api/2.0/dbfs/' % (DOMAIN)
def dbfs_rpc(action, body):
""" A helper function to make the DBFS API request, request/response is encoded/decoded as JSON """
response = requests.post(
BASE_URL + action,
headers={'Authorization': 'Bearer %s' % TOKEN },
json=body
)
return response.json()
# Create a handle that will be used to add blocks
handle = dbfs_rpc("create", {"path": "/temp/upload_large_file", "overwrite": "true"})['handle']
with open('/a/local/file') as f:
while True:
# A block can be at most 1MB
block = f.read(1 << 20)
if not block:
break
data = base64.standard_b64encode(block)
dbfs_rpc("add-block", {"handle": handle, "data": data})
# close the handle to finish uploading
dbfs_rpc("close", {"handle": handle})
When using the tutorial as is, I get an error:
Traceback (most recent call last):
File "db_api.py", line 65, in <module>
data = base64.standard_b64encode(block)
File "C:\Miniconda3\envs\dash_p36\lib\base64.py", line 95, in standard_b64encode
return b64encode(s)
File "C:\Miniconda3\envs\dash_p36\lib\base64.py", line 58, in b64encode
encoded = binascii.b2a_base64(s, newline=False)
TypeError: a bytes-like object is required, not 'str'
I tried doing with open('./sample.csv', 'rb') as f: before passing the blocks to base64.standard_b64encode but then getting another error:
TypeError: Object of type 'bytes' is not JSON serializable
This happens when the encoded block data is being sent into the API call.
I tried skipping encoding entirely and just passing the blocks into the post call. In this case the file gets created in the DBFS but has 0 bytes size.
At this point I'm trying to make sense of it all. It doesn't want a string but it doesn't want bytes either. What am I doing wrong? Appreciate any help.
In Python we have strings and bytes, which are two different entities note that there is no implicit conversion between them, so you need to know when to use which and how to convert when necessary. This answer provides nice explanation.
With the code snippet I see two issues:
This you already got - open by default reads the file as text. So your block is a string, while standard_b64encode expects bytes and returns bytes. To read bytes from file it needs to be opened in binary mode:
with open('/a/local/file', 'rb') as f:
Only strings can be encoded as JSON. There's no source code available for dbfs_rpc (or I can't find it), but apparently it expects a string, which it internally encodes. Since your data is bytes, you need to convert it to string explicitly and that's done using decode:
dbfs_rpc("add-block", {"handle": handle, "data": data.decode('utf8')})

Error handling: how to properly prioritize exceptions

There are two functions: one downloads the excel file (ExcelFileUploadView(APIView)) and the other processes the downloaded file(def parse_excel_rfi_sheet).
Function parse_excel_rfi_sheet is called inside ExcelFileUploadView(APIView)
class ExcelFileUploadView(APIView):
parser_classes = (MultiPartParser, FormParser)
permission_classes = (permissions.AllowAny,)
def put(self, request, format=None):
if 'file' not in request.data:
raise ParseError("Empty content")
f = request.data['file']
filename = f.name
if filename.endswith('.xlsx'):
try:
file = default_storage.save(filename, f)
r = parse_excel_rfi_sheet(file)
status = 200
except:
raise Exception({"general_errors": ["Error during file upload"]})
finally:
default_storage.delete(file)
else:
status = 406
r = {"general_errors": ["Please upload only xlsx files"]}
return Response(r, status=status)
def parse_excel_rfi_sheet(file):
workbook = load_workbook(filename=file)
sheet = workbook["RFI"]
curent_module_coordinate = []
try:
....
curent_module_coordinate.append(sheet['E688'].value)
curent_module_coordinate.append(sheet['E950'].value)
if check_exel_rfi_template_structure(structure=curent_module_coordinate):
file_status = True
else:
file_status = False
except:
raise Exception({"general_errors": ["Error during excel file parsing. Unknown module cell"]})
The problem is that when an error occurs inside the parse_excel_rfi_sheet, I do not see a call of {"general_errors": ["Error during excel file parsing. Unknown module cell"]}
Instead, I always see the call
{"general_errors": ["Error during file upload"]}
That's why I can't understand at what stage the error occurred: at the moment of downloading the file or at the moment of processing.
How to change this?
Since you are calling parse_excel_rfi_sheet from ExcelFileUploadView whenever the exception {"general_errors": ["Error during excel file parsing. Unknown module cell"]} is raised from parse_excel_rfi_sheet function try block from ExcelFileUploadView fails and comes to except and raises the exception {"general_errors": ["Error during file upload"]}.
You can verify this by printing the exception raised by the ExcelFileUploadView function.
Chane the try block to the following:
try:
file = default_storage.save(filename, f)
r = parse_excel_rfi_sheet(file)
status = 200
except Exception as e:
print("Exception raised ", e)
raise Exception({"general_errors": ["Error during file upload"]})
Your problem comes from catching absolutely all exceptions, first in parse_excel_rfi_sheet, then once again in your put method. Both bare except clause (except: whatever_code_here) and large try blocks are antipatterns - you only want to catch the exact exceptions you're expecting at a given point (using except (SomeExceptionType, AnotherExceptionType, ...) as e:, and have as few code as possible in your try blocks so you are confident you know where the exception comes from.
The only exception (no pun intended) to this rule is the case of "catch all" handlers at a higher level, that are use to catch unexpected errors, log them (so you have a trace of what happened), and present a friendly error message to the user - but even then, you don't want a bare except clause but a except Exception as e.
TL;DR: never assume anything about which exception was raised, where and why, and never pass exceptions silently (at least log them - and check your logs).
raise Exception(...) generates a new Exception instance and raises that one.
This means, the try ... except in put effectively throws away the exception it caught and replaces it with a new one with message "Error during file upload", which is why you always see the same message.
A clean way to handle this would be to define a custom subclass of Exception (e.g., InvalidFormatException) and raise that one in parse_excel_rfi_sheet, having two different except cases in put:
class InvalidFormatException(Exception):
pass
[...]
def parse_excel_rfi_sheet(file):
workbook = load_workbook(filename=file)
sheet = workbook["RFI"]
curent_module_coordinate = []
try:
....
curent_module_coordinate.append(sheet['E688'].value)
curent_module_coordinate.append(sheet['E950'].value)
if check_exel_rfi_template_structure(structure=curent_module_coordinate):
file_status = True
else:
file_status = False
except:
raise InvalidFormatException({"general_errors": ["Error during excel file parsing. Unknown module cell"]})
Your put then becomes:
def put(self, request, format=None):
if 'file' not in request.data:
raise ParseError("Empty content")
f = request.data['file']
filename = f.name
if filename.endswith('.xlsx'):
try:
file = default_storage.save(filename, f)
r = parse_excel_rfi_sheet(file)
status = 200
except InvalidFormatException:
raise # pass on the exception
except:
raise Exception({"general_errors": ["Error during file upload"]})
finally:
default_storage.delete(file)
else:
status = 406
r = {"general_errors": ["Please upload only xlsx files"]}
return Response(r, status=status)
Warning: As pointed out in the comments to this answer, note that -although not directly inquired- the OP's code should be further modified to remove the bare except: clause, as this is probably not the expected behaviour.

Porting pickle py2 to py3 strings become bytes

I have a pickle file that was created with python 2.7 that I'm trying to port to python 3.6. The file is saved in py 2.7 via pickle.dumps(self.saved_objects, -1)
and loaded in python 3.6 via loads(data, encoding="bytes") (from a file opened in rb mode). If I try opening in r mode and pass encoding=latin1 to loads I get UnicodeDecode errors. When I open it as a byte stream it loads, but literally every string is now a byte string. Every object's __dict__ keys are all b"a_variable_name" which then generates attribute errors when calling an_object.a_variable_name because __getattr__ passes a string and __dict__ only contains bytes. I feel like I've tried every combination of arguments and pickle protocols already. Apart from forcibly converting all objects' __dict__ keys to strings I'm at a loss. Any ideas?
** Skip to 4/28/17 update for better example
-------------------------------------------------------------------------------------------------------------
** Update 4/27/17
This minimum example illustrates my problem:
From py 2.7.13
import pickle
class test(object):
def __init__(self):
self.x = u"test ¢" # including a unicode str breaks things
t = test()
dumpstr = pickle.dumps(t)
>>> dumpstr
"ccopy_reg\n_reconstructor\np0\n(c__main__\ntest\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nVtest \xa2\np7\nsb."
From py 3.6.1
import pickle
class test(object):
def __init__(self):
self.x = "xyz"
dumpstr = b"ccopy_reg\n_reconstructor\np0\n(c__main__\ntest\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nS'x'\np6\nVtest \xa2\np7\nsb."
t = pickle.loads(dumpstr, encoding="bytes")
>>> t
<__main__.test object at 0x040E3DF0>
>>> t.x
Traceback (most recent call last):
File "<pyshell#15>", line 1, in <module>
t.x
AttributeError: 'test' object has no attribute 'x'
>>> t.__dict__
{b'x': 'test ¢'}
>>>
-------------------------------------------------------------------------------------------------------------
Update 4/28/17
To re-create my issue I'm posting my actual raw pickle data here
The pickle file was created in python 2.7.13, windows 10 using
with open("raw_data.pkl", "wb") as fileobj:
pickle.dump(library, fileobj, protocol=0)
(protocol 0 so it's human readable)
To run it you'll need classes.py
# classes.py
class Library(object): pass
class Book(object): pass
class Student(object): pass
class RentalDetails(object): pass
And the test script here:
# load_pickle.py
import pickle, sys, itertools, os
raw_pkl = "raw_data.pkl"
is_py3 = sys.version_info.major == 3
read_modes = ["rb"]
encodings = ["bytes", "utf-8", "latin-1"]
fix_imports_choices = [True, False]
files = ["raw_data_%s.pkl" % x for x in range(3)]
def py2_test():
with open(raw_pkl, "rb") as fileobj:
loaded_object = pickle.load(fileobj)
print("library dict: %s" % (loaded_object.__dict__.keys()))
return loaded_object
def py2_dumps():
library = py2_test()
for protcol, path in enumerate(files):
print("dumping library to %s, protocol=%s" % (path, protcol))
with open(path, "wb") as writeobj:
pickle.dump(library, writeobj, protocol=protcol)
def py3_test():
# this test iterates over the different options trying to load
# the data pickled with py2 into a py3 environment
print("starting py3 test")
for (read_mode, encoding, fix_import, path) in itertools.product(read_modes, encodings, fix_imports_choices, files):
py3_load(path, read_mode=read_mode, fix_imports=fix_import, encoding=encoding)
def py3_load(path, read_mode, fix_imports, encoding):
from traceback import print_exc
print("-" * 50)
print("path=%s, read_mode = %s fix_imports = %s, encoding = %s" % (path, read_mode, fix_imports, encoding))
if not os.path.exists(path):
print("start this file with py2 first")
return
try:
with open(path, read_mode) as fileobj:
loaded_object = pickle.load(fileobj, fix_imports=fix_imports, encoding=encoding)
# print the object's __dict__
print("library dict: %s" % (loaded_object.__dict__.keys()))
# consider the test a failure if any member attributes are saved as bytes
test_passed = not any((isinstance(k, bytes) for k in loaded_object.__dict__.keys()))
print("Test %s" % ("Passed!" if test_passed else "Failed"))
except Exception:
print_exc()
print("Test Failed")
input("Press Enter to continue...")
print("-" * 50)
if is_py3:
py3_test()
else:
# py2_test()
py2_dumps()
put all 3 in the same directory and run c:\python27\python load_pickle.py first which will create 1 pickle file for each of the 3 protocols. Then run the same command with python 3 and notice that it version converts the __dict__ keys to bytes. I had it working for about 6 hours, but for the life of me I can't figure out how I broke it again.
In short, you're hitting bug 22005 with datetime.date objects in the RentalDetails objects.
That can be worked around with the encoding='bytes' parameter, but that leaves your classes with __dict__ containing bytes:
>>> library = pickle.loads(pickle_data, encoding='bytes')
>>> dir(library)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: '<' not supported between instances of 'str' and 'bytes'
It's possible to manually fix that based on your specific data:
def fix_object(obj):
"""Decode obj.__dict__ containing bytes keys"""
obj.__dict__ = dict((k.decode("ascii"), v) for k, v in obj.__dict__.items())
def fix_library(library):
"""Walk all library objects and decode __dict__ keys"""
fix_object(library)
for student in library.students:
fix_object(student)
for book in library.books:
fix_object(book)
for rental in book.rentals:
fix_object(rental)
But that's fragile and enough of a pain you should be looking for a better option.
1) Implement __getstate__/__setstate__ that maps datetime objects to a non-broken representation, for instance:
class Event(object):
"""Example class working around datetime pickling bug"""
def __init__(self):
self.date = datetime.date.today()
def __getstate__(self):
state = self.__dict__.copy()
state["date"] = state["date"].toordinal()
return state
def __setstate__(self, state):
self.__dict__.update(state)
self.date = datetime.date.fromordinal(self.date)
2) Don't use pickle at all. Along the lines of __getstate__/__setstate__, you can just implement to_dict/from_dict methods or similar in your classes for saving their content as json or some other plain format.
A final note, having a backreference to library in each object shouldn't be required.
You should treat pickle data as specific to the (major) version of Python that created it.
(See Gregory Smith's message w.r.t. issue 22005.)
The best way to get around this is to write a Python 2.7 program to read the pickled data, and write it out in a neutral format.
Taking a quick look at your actual data, it seems to me that an SQLite database is appropriate as an interchange format, since the Books contain references to a Library and RentalDetails. You could create separate tables for each.
Question: Porting pickle py2 to py3 strings become bytes
The given encoding='latin-1' below, is ok.
Your Problem with b'' are the result of using encoding='bytes'.
This will result in dict-keys being unpickled as bytes instead of as str.
The Problem data are the datetime.date values '\x07á\x02\x10', starting at line 56 in raw-data.pkl.
It's a konwn Issue, as pointed already.
Unpickling python2 datetime under python3
http://bugs.python.org/issue22005
For a workaround, I have patched pickle.py and got unpickled object, e.g.
book.library.books[0].rentals[0].rental_date=2017-02-16
This will work for me:
t = pickle.loads(dumpstr, encoding="latin-1")
Output:
<main.test object at 0xf7095fec>
t.__dict__={'x': 'test ¢'}
test ¢
Tested with Python:3.4.2

Python exception handling - line number

I'm using python to evaluate some measured data. Because of many possible results it is difficult to handle or possible combinations. Sometimes an error happens during the evaluation. It is usually an index error because I get out of range from measured data.
It is very difficult to find out on which place in code the problem happened. It would help a lot if I knew on which line the error was raised. If I use following code:
try:
result = evaluateData(data)
except Exception, err:
print ("Error: %s.\n" % str(err))
Unfortunately this only tells me that there is and index error. I would like to know more details about the exception (line in code, variable etc.) to find out what happened. Is it possible?
Thank you.
Solution, printing filename, linenumber, line itself and exception description:
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
try:
print 1/0
except:
PrintException()
Output:
EXCEPTION IN (D:/Projects/delme3.py, LINE 15 "print 1/0"): integer division or modulo by zero
To simply get the line number you can use sys, if you would like to have more, try the traceback module.
import sys
try:
[][2]
except IndexError:
print("Error on line {}".format(sys.exc_info()[-1].tb_lineno))
prints:
Error on line 3
Example from the traceback module documentation:
import sys, traceback
def lumberjack():
bright_side_of_death()
def bright_side_of_death():
return tuple()[0]
try:
lumberjack()
except IndexError:
exc_type, exc_value, exc_traceback = sys.exc_info()
print "*** print_tb:"
traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
print "*** print_exception:"
traceback.print_exception(exc_type, exc_value, exc_traceback,
limit=2, file=sys.stdout)
print "*** print_exc:"
traceback.print_exc()
print "*** format_exc, first and last line:"
formatted_lines = traceback.format_exc().splitlines()
print formatted_lines[0]
print formatted_lines[-1]
print "*** format_exception:"
print repr(traceback.format_exception(exc_type, exc_value,
exc_traceback))
print "*** extract_tb:"
print repr(traceback.extract_tb(exc_traceback))
print "*** format_tb:"
print repr(traceback.format_tb(exc_traceback))
print "*** tb_lineno:", exc_traceback.tb_lineno
I use the traceback which is simple and robust:
import traceback
try:
raise ValueError()
except:
print(traceback.format_exc()) # or: traceback.print_exc()
Out:
Traceback (most recent call last):
File "catch.py", line 4, in <module>
raise ValueError()
ValueError
The simplest way is just to use:
import traceback
try:
<blah>
except IndexError:
traceback.print_exc()
or if using logging:
import logging
try:
<blah>
except IndexError as e:
logging.exception(e)
Gives you file, lineno, and exception for the last item in the call stack
from sys import exc_info
from traceback import format_exception
def print_exception():
etype, value, tb = exc_info()
info, error = format_exception(etype, value, tb)[-2:]
print(f'Exception in:\n{info}\n{error}')
try:
1 / 0
except:
print_exception()
prints
Exception in:
File "file.py", line 12, in <module>
1 / 0
ZeroDivisionError: division by zero
I would suggest using the python logging library, it has two useful methods that might help in this case.
logging.findCaller()
findCaller(stack_info=False) - Reports just the line number for the previous caller leading to the exception raised
findCaller(stack_info=True) - Reports the line number & stack for the previous caller leading to the exception raised
logging.logException()
Reports the line & stack within the try/except block that raised the exception
For more info checkout the api https://docs.python.org/3/library/logging.html
There are many answers already posted here that show how to get the line number, but it's worth noting that if you want variables containing the "raw data," so to speak, of the stack trace so that you can have more granular control of what you display or how you format it, using the traceback module you can step through the stack frame by frame and look at what's stored in the attributes of the frame summary objects. There are several simple and elegant ways to manipulate the frame summary objects directly. Let's say for example that you want the line number from the last frame in the stack (which tells you which line of code triggered the exception), here's how you could get it by accessing the relevant frame summary object:
Option 1:
import sys
import traceback
try:
# code that raises an exception
except Exception as exc:
exc_type, exc_value, exc_tb = sys.exc_info()
stack_summary = traceback.extract_tb(exc_tb)
end = stack_summary[-1] # or `stack_summary.pop(-1)` if you prefer
Option 2:
import sys
import traceback
try:
# code that raises an exception
except Exception as exc:
tbe = traceback.TracebackException(*sys.exc_info())
end = tbe.stack[-1] # or `tbe.stack.pop(-1)` if you prefer
In either of the above examples, end will be a frame summary object:
&gt&gt&gt type(end)
&ltclass &#39traceback.FrameSummary&#39&gt
which was in turn taken from a stack summary object:
&gt&gt&gt type(stack_summary) # from option 1
&ltclass &#39traceback.StackSummary&#39&gt
&gt&gt&gt type(tbe&#46stack) # from option 2
&ltclass &#39traceback.StackSummary&#39&gt
The stack summary object behaves like a list and you can iterate through all of the frame summary objects in it however you want in order to trace through the error. The frame summary object (end, in this example), contains the line number and everything else you need to locate where in the code the exception occurred:
>>> print(end.__doc__)
A single frame from a traceback.
- :attr:`filename` The filename for the frame.
- :attr:`lineno` The line within filename for the frame that was
active when the frame was captured.
- :attr:`name` The name of the function or method that was executing
when the frame was captured.
- :attr:`line` The text from the linecache module for the
of code that was running when the frame was captured.
- :attr:`locals` Either None if locals were not supplied, or a dict
mapping the name to the repr() of the variable.
And if you capture the exception object (either from except Exception as exc: syntax or from the second object returned by sys.exc_info()), you will then have everything you need to write your own highly customized error printing/logging function:
err_type = type(exc).__name__
err_msg = str(exc)
Putting it all together:
from datetime import datetime
import sys
import traceback
def print_custom_error_message():
exc_type, exc_value, exc_tb = sys.exc_info()
stack_summary = traceback.extract_tb(exc_tb)
end = stack_summary[-1]
err_type = type(exc_value).__name__
err_msg = str(exc_value)
date = datetime.strftime(datetime.now(), "%B %d, %Y at precisely %I:%M %p")
print(f"On {date}, a {err_type} occured in {end.filename} inside {end.name} on line {end.lineno} with the error message: {err_msg}.")
print(f"The following line of code is responsible: {end.line!r}")
print("Please make a note of it.")
def do_something_wrong():
try:
1/0
except Exception as exc:
print_custom_error_message()
if __name__ == "__main__":
do_something_wrong()
Let's run it!
user#some_machine:~$ python example.py
On August 25, 2022 at precisely 01:31 AM, a ZeroDivisionError occured in example.py inside do_something_wrong on line 21 with the error message: division by zero.
The following line of code is responsible: '1/0'
Please make a note of it.
At this point you can see how you could print this message for any place in the stack: end, beginning, anywhere in-between, or iterate through and print it for every frame in the stack.
Of course, the formatting functionality already provided by the traceback module covers most debugging use cases, but it's useful to know how to manipulate the traceback objects to extract the information you want.
I always use this snippet
import sys, os
try:
raise NotImplementedError("No error")
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
for different views and possible issues you can refer When I catch an exception, how do I get the type, file, and line number?
All the solutions answer the OPs problem, however, if one is holding onto a specific error instance and the last traceback stack won't do it they will not suffice —a corner scenario example given below.
In this case, the magic attribute __traceback__ of a raised exception instance may be used. This is a system traceback object (exposed as types.TracebackType, see Types) not the module. This traceback instance behaves just one like would expect (but it is always nice to check):
from collections import deque
from typing import (List, )
errors: List[Exception] = deque([], 5)
def get_raised(error_cls: type, msg: str) -> Exception:
# for debugging, an exception instance that is not raised
# ``__traceback__`` has ``None``
try:
raise error_cls(msg)
except Exception as error:
return error
error = get_raised(NameError, 'foo')
errors.append(error)
error = get_raised(ValueError, 'bar')
errors.append(error)
try:
raise get_raised(TypeError, 'bar')
except Exception as error:
errors.append(error)
Now, I can check out the first error's details:
import types
traceback = errors[0].__traceback__ # inadvisable name due to module
line_no: int = traceback.tb_lineno
frame: types.FrameType = traceback.tb_frame
previous: Union[type(None), types.TracebackType] = traceback.tb_next
filename: str = frame.f_code.co_filename
The traceback's previous is None for the second error despite a preceding error as expected, but for the third wherein an error is raised twice it is not.
This below is just a test which makes no sense contextually. A case were it is useful if when an exception is raised in a view of a webapp (a 500 status kind of incident) that gets caught and stored for the admit to inspect akin to Setry.io (but for free). Here is a minimal example where the home page / will raise an error, that gets caught and the route errors will list them. This is using Pyramid in a very concentrated way (multifile is way better) with no logging or authentication and the error logging could be better for the admin to inspect similar to Sentry.io.
from pyramid.config import Configurator
from waitress import serve
from collections import deque
# just for typehinting:
from pyramid.request import Request
from pyramid.traversal import DefaultRootFactory
from pyramid.router import Router
import types
from typing import (List, )
def home_view(context: DefaultRootFactory, request: Request) -> dict:
raise NotImplementedError('I forgot to fill this')
return {'status': 'ok'} # never reached.
def caught_view(error: Exception, request: Request) -> dict:
"""
Exception above is just type hinting.
This is controlled by the context argument in
either the ``add_exception_view`` method of config,
or the ``exception_view_config`` decorator factory (callable class)
"""
# this below is a simplification as URLDecodeError is an attack (418)
request.response.status = 500
config.registry.settings['error_buffer'].append(error)
#logging.exception(error) # were it set up.
#slack_admin(format_error(error)) # ditto
return {'status': 'error', 'message': 'The server crashed!!'}
def format_error(error: Exception) -> str:
traceback = error.__traceback__ # inadvisable name due to module
frame: types.FrameType = traceback.tb_frame
return f'{type(error).__name__}: {error}' +\
f'at line {traceback.tb_lineno} in file {frame.f_code.co_filename}'
def error_view(context: DefaultRootFactory, request: Request) -> dict:
print(request.registry.settings['error_buffer'])
return {'status': 'ok',
'errors':list(map(format_error, request.registry.settings['error_buffer']))
}
with Configurator(settings=dict()) as config:
config.add_route('home', '/')
config.add_route('errors', '/errors')
config.add_view(home_view, route_name='home', renderer='json')
config.add_view(error_view, route_name='errors', renderer='json')
config.add_exception_view(caught_view, context=Exception, renderer='json')
config.registry.settings['error_buffer']: List[Exception] = deque([], 5)
# not in config.registry.settings, not JSON serialisable
# config.add_request_method
app : Router = config.make_wsgi_app()
port = 6969
serve(app, port=port)

python StringIO with urlli2.urlopen mocked with unittest

I have an interesting problem. I am mocking urllib2.urlopen with the python mock library as follows:
def mock_url_open_conn_for_json_feed():
json_str = """
{"actions":[{"causes":[{"shortDescription":"Started by user anonymous","userId":null,"userName":"anonymous"}]}],"artifacts":[],"building":false,"description":null,"duration":54,"estimatedDuration":54,
"fullDisplayName":"test3#1",
"id":"2012-08-24_14-10-34","keepLog":false,"number":1,"result":"SUCCESS","timestamp":1345842634000,
"url":"http://localhost:8080/job/test3/1/","builtOn":"","changeSet":{"items":[],"kind":null},"culprits":[]}
"""
return StringIO(json_str)
def test_case_foo(self):
io = mock_url_open_conn_for_json_feed()
io.seek(0)
mylib.urllib2.urlopen = Mock(return_value=io)
test_obj.do_your_thing()
def test_case_foo_bar(self)
io = mock_url_open_conn_for_json_feed()
io.seek(0)
mylib.urllib2.urlopen = Mock(return_value=io)
test_obj.param = xyz
test_obj.do_your_thing()
class ObjUnderTest():
def do_your_thing(self):
conn = urllib2.urlopen(url)
simplejson.load(conn)
the first unit test "test_case_foo" runs without a problem. But simplejson.load closes the StringIO, so "test_case_foo_bar" calls on do_your_thing() and it tries to simplejson.load the same StringIO object (even though I return the constructor of StringIO), and it's already been closed. I get the following error:
json = simplejson.load(conn)
File "/Users/sam/Library/Python/2.7/lib/python/site-packages/simplejson/__init__.py", line 391, in load
return loads(fp.read(),
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/StringIO.py", line 127, in read
_complain_ifclosed(self.closed)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/StringIO.py", line 40, in _complain_ifclosed
raise ValueError, "I/O operation on closed file"
ValueError: I/O operation on closed file
I have two questions:
1) Why is the StringIO constructor not returning a new object?
2) Is there a work around for this? Or a better way to achieve what I'm trying to achieve?

Categories