python udf error in pig - python

I am trying to run below python udf in Pig
#outputSchema("word:chararray")
def get(s):
out = s.lower()
return out;
I am getting below error :
File "/home/test.py", line 3, in get
out = s.lower()
AttributeError: 'NoneType' object has no attribute 'lower'

You should handle the case when s is none. In most of the examples such as:
from pig_util import outputSchema
#outputSchema('decade:chararray')
def decade(year):
"""
Get the decade, given a year.
e.g. for 1998 -> '1990s'
"""
try:
base_decade_year = int(year) - (int(year) % 10)
decade_str = '%ss' % base_decade_year
print 'input year: %s, decade: %s' % (year, decade_str)
return decade_str
except ValueError:
return None
You need to handle the case when the value is None. So, one possible fix would be to try:
#outputSchema("word:chararray")
def get(s):
if s is None:
return None
return str(s).lower()

Related

Python automatically adds singlequotes around float value in List [duplicate]

This question already has answers here:
Possible to enforce type hints?
(2 answers)
Closed 12 months ago.
I struggle with Python(3.9.7) automatically adding singlequotes around a float-value, which leads to a failing assertion.
My goal is to use pytest to assert an expected output of a parsing-function. The parser takes a json-object (shortened code sample below) and returns a list of Signal objects. I copied the print-out from logging.debug(f"{signal_list}") inside the parsing function, and assigned it to expected_result in my test-function:
#Arrange
message = json.dumps({"data":{"name":"battery_volt","alias":"Volt","unit":"volt","value":12.0,"time":1644587969}})
expected_result = [Signal(id=None, name='battery_volt', description=None, value=12.0, type=None, unit='volt', time='2022-02-11T13:59:29')]
print(expected_result)
p = Parser(message)
#Act
result = p.parse_message()
#Assert
assert result == expected_result
Irritatingly, pytest -vv throws an AssertionError:
E Full diff:
E - [Signal(id=None, name='battery_volt', description=None, value='12.0', type=None, unit='volt', time='2022-02-11T13:59:29')]
E ? - -
E + [Signal(id=None, name='battery_volt', description=None, value=12.0, type=None, unit='volt', time='2022-02-11T13:59:29')]
The upper line seems to be the value of expected_result, because print(expected_result)
also adds the singlequotes around the 12.0
I assume the copied output from logging.debug(f"{signal_list}") isn't the same as the real value of result. I tried typecasting expected_result as list, str()-converting both result and expected_result inside the test, but expected_result always has '12.0' and result has 12.0.
I desperatly need a hint how to do this kind of assertion the correct way.
EDIT:
Here is the parsing function:
def parse_message(self):
message = json.loads(self.message)
#logging.debug(f"message is: {message}")
message_data = message.get('data', {})
parsed_data = []
try:
device = message_data.get('device', None)
if device is not None:
vehicle = self.parse_vehicle(device)
parsed_data.append(vehicle)
else:
logging.error("'device' was None!")
except Exception as e:
logging.error(e)
signals = []
try:
data = message_data.get('data', None)
if data is not None:
signals = self.parse_signals(data)
gps = message_data.get('gps', None)
if gps is not None:
gps_signal = self.parse_gps(gps)
signals.append(gps_signal)
parsed_data.append(signals)
except Exception as e:
logging.error(e)
return parsed_data
if __name__ == "__main__":
setup_logging()
message = json.dumps({"consumerid":redacted,"data":{"device":{"time":1644587969,"imei":"redacted","man_id":redacted,"car_id":999,"vin":"redacted"},"data":[{"name":"battery_volt","alias":"Volt","unit":"volt","value":12.0,"time":1644587969}],"gps":{"lat":51.437515,"lon":6.9281199,"dir":252,"alt":88,"sat":19,"time":1644587969}},"event":"redacted","itemid":redacted,"itemtype":1,"senderip":"redacted"})
p = Parser(message)
signal_list = p.parse_message()
logging.debug(f"{signal_list}")
Please note that the passed json-objects are more complex than the code-sample in the original post.
class Signal(BaseModel):
id: int = None
name: str = None
description: str = None
value: str = None
type: str = None
unit: str = None
time: str = None
EDIT2 - Assignment of Signal.value happens here:
def parse_signals(self, data):
signals = []
#logging.debug(f"data is : {data}")
for data_object in data:
signal = Signal()
try:
signal.name = data_object.get('name', None)
#get dtc-count value as seperate return-element, needed by the controller to handle dtc-codes
if signal.name == 'dtc_count':
self.dtc_count == data_object.get('value', None)
signal.description = data_object.get('description', None)
signal.value = data_object.get('value', None)
signal.time = datetime.datetime.utcfromtimestamp(data_object.get('time', None)).isoformat()
signal.unit = data_object.get('unit', None)
if signal.unit == "dtc":
signal.type = "1"
if signal.name is not None:
signals.append(signal)
#logging.debug(signal.__dict__)
except Exception as e:
logging.error(f"While parsing signal {data_object}, the following error occured: {e}")
return signals
When parse_message is called as __name__ == "main":, the testcode beneath outputs value=12.0
Ok, turns out python type hints don't enforce like I expected:
When parse_message is called by way of __name__ == "main":, the testcode beneath outputs value=12.0 Apparently, despite the type-hint :str for Signal.value, 12.0 was assigned as float. When I tried to sys.stdout.write(signal_list), I got a TypeError.
I now simply str()-convert in parse_signals() like this
signal.value = str(data_object.get('value', None))
resulting in having my value in single quotes consistently.

Beginning date is bigger than the end date of a calendaristic period

I am trying to write a function that validates a package that contains 2 dates (beginning and end), a destination and a price
Initially, I tried to write a function that "creates" dates and puts them in a list and then compares them to find out if the end date was lower than the beginning but I figured that was too complicated so I resorted to the datetime inbuilt module
However, when I try to run the test function, it fails and it outputs this error message
File "C:\Users\Anon\Desktop\Fac\FP\pythonProject\main.py", line 65, in valideaza_pachet
raise Exception(err)
Exception: wrong dates!
I assume that I must have messed up a condition in the valideaza_pachet() function, but I don't understand what I did wrong
The code:
import time
import calendar
from datetime import date, datetime
def creeaza_pachet(data_i, data_s, dest, pret):
# function that creates a tourism package, data_i = beginning date, data_s = end date, dest = destination and pret = price
return {
"data_i": data_i,
"data_s": data_s,
"dest": dest,
"pret": pret
}
def get_data_i(pachet):
# functie that returns the beginning date of the package
return pachet["data_i"]
def get_data_s(pachet):
# functie that returns the end date of the package
return pachet["data_s"]
def get_destinatie(pachet):
# functie that returns the destination of the package
return pachet["dest"]
def get_pret(pachet):
# functie that returns the price of the package
# input: pachet - un pachet
# output: pretul float > 0 al pachetului
return pachet["pret"]
def valideaza_pachet(pachet):
#functie that validates if a package was correctly introduced or not
#it raises an Exception as ex if any of the conditions aren't met
err = ""
if get_data_i(pachet) < get_data_s(pachet):
err += "wrong dates!" # if the end date is lower than the beginning date
if get_destinatie(pachet) == " ":
err += "wrong destination!"
if get_pret(pachet) <= 0:
err += "wrong price!"
if len(err) > 0:
raise Exception(err)
def test_valideaza_pachet():
pachet = creeaza_pachet(datetime.strptime('24/08/2021',"%d/%m/%Y"), datetime.strptime('26/08/2021',"%d/%m/%Y"), "Galati", 9000.1)
valideaza_pachet(pachet)
pachet_gresit = creeaza_pachet(datetime.strptime('24/08/2021',"%d/%m/%Y"), datetime.strptime('22/08/2021',"%d/%m/%Y"), "Galati", 9000.1)
try:
valideaza_pachet(pachet_gresit)
assert False
except Exception as ex:
assert (str(ex) == "wrong dates!\n")
alt_pachet = creeaza_pachet(datetime.strptime('24/08/2021',"%d/%m/%Y"), datetime.strptime('22/08/2021',"%d/%m/%Y"), " ", -904)
try:
valideaza_pachet(alt_pachet)
assert False
except Exception as ex:
assert(str(ex) == "wrong dates!\nwrong destination!\nwrong price!\n")
def test_creeaza_pachet():
data_i_string = '24/08/2021'
data_i = datetime.strptime(data_i_string, "%d/%m/%Y")
data_s_string = '26/08/2021'
data_s = datetime.strptime(data_s_string, "%d/%m/%Y")
dest = "Galati"
pret = 9000.1
pachet = creeaza_pachet(data_i,data_s,dest,pret)
assert (get_data_i(pachet) == data_i)
assert (get_data_s(pachet) == data_s)
assert (get_destinatie(pachet) == dest)
assert (abs(get_pret(pachet) - pret) < 0.0001)
def run_teste():
test_creeaza_pachet()
test_valideaza_pachet()
def run():
pass
def main():
run()
run_teste()
main()
This is more of a code review and kind of off-topic here but... First,
drop all assert False - these won't do anything useful
drop the getter functions, that just makes things convoluted (just use dict[key] in the code as long as you don't use custom class)
drop unnecessary imports like calendar
you might also want to drop the run and main functions (again convoluted) - just call the functions you need
Then you could change valideaza_pachet to raise specific value errors:
def valideaza_pachet(pachet):
if pachet["data_i"] >= pachet["data_s"]:
raise ValueError("start date must be < end date")
if pachet["dest"] == " ":
raise ValueError("destination must not be empty")
if pachet["pret"] <= 0:
raise ValueError("price must be >= 0")
# returns None if no exception was raised
Now to test a valid package, you would just do
valideaza_pachet(pachet) # no exception expected
Testing an invalid package is a bit more complicated without a class that can be unit-tested (see e.g. here) - but you can catch the exception and use the else clause of the try/except to raise an AssertionError that says you wanted an exception:
try:
valideaza_pachet(pachet_gresit)
except Exception as ex:
print(f"successfully received exception: {ex}")
else:
raise AssertionError("validation should have raised an Exception")
or even
assert str(ex) == "start date must be < end date", f"got '{ex}', expected 'start date must be < end date'"
in place of the print statement.

W602 raise ValueError - How has the message to look like?

I am very new to python and my first task is to check older code (not mine!) to convert it according to pep8.
I have the following code block and I should change raise ValueError to raise ValueError("Message"). How has the syntax of the message to look like, something like 'could not find %c in %s' % (ch,str)?
def sort_key(self, string):
collation_elements = []
lookup_key = [ord(ch) for ch in string]
while lookup_key:
value, lookup_key = self.table.find_prefix(lookup_key)
if not value:
# ###
raise ValueError, map(hex, lookup_key)
collation_elements.extend(value)
sort_key = []
for level in range(4):
if level:
sort_key.append(0) # level separator
for element in collation_elements:
ce_l = int(element[1][level], 16)
if ce_l:
sort_key.append(ce_l)
return tuple(sort_key)

Custom date widget in django?

In my current application, which is based on python and django, I created a custom widget for date.
from datetime import date
from django.forms import widgets
class DateSelectorWidget(widgets.MultiWidget):
def __init__(self, attrs=None):
# create choices for months, years
# example below, the rest snipped for brevity.
years = [(year, year) for year in range(1945, 2016)]
months = [(1,'Jan'),(2,'Feb')]
_widgets = (
widgets.Select(attrs=attrs, choices=months),
widgets.Select(attrs=attrs, choices=years),
)
super(DateSelectorWidget, self).__init__(_widgets, attrs)
def decompress(self, value):
if value:
return [value.month, value.year]
return [None, None]
def format_output(self, rendered_widgets):
return u''.join(rendered_widgets)
def value_from_datadict(self, data, files, name):
datelist = [
widget.value_from_datadict(data, files, name + '_%s' % i)
for i, widget in enumerate(self.widgets)]
try:
D = date(day=1, month=int(datelist[0]),
year=int(datelist[1]))
except ValueError:
return ''
else:
return str(D)
It's working fine when form is being loaded (return date object), but when I submit the form and left some of fields as empty in the form then I am getting the following error.
Caught AttributeError while rendering: 'str' object has no attribute 'month'
Request Method: POST
Request URL:
Django Version: 1.3.1
Exception Type: TemplateSyntaxError
Exception Value:
Caught AttributeError while rendering: 'str' object has no attribute 'month'
Exception Location: /var/www/stacks/django-apps/kkk/apps/oooomonth_year.py in decompress, line 21
You are getting the error here,
def decompress(self, value):
if value:
return [value.month, value.year]
return [None, None]
value object has no attribute month. if you want to know the value attributes print dir(value). In this value is a string.
Update:
check the attribute hasattr(value, 'month'). I hope this helps you.
Its came into existence when post data bind into form. (Not stored into DB due validation message or any other issue). since
def decompress(self, value):
if value:
return [value.month, value.year]
return [None, None]
above method is handling date only when date is storing into DB. but when form is not submitted successfully then its behaves like "Str object". so you need to update above method by below defined method:
def decompress(self, value):
if value:
import types
try:
if type(value) == types.StringType:
from datetime import datetime as dt
tmp_date = dt.strptime(value, '%Y-%m-%d')
return [tmp_date.month, tmp_date.year]
else:
return [value.month, value.year]
except:
raise
else:
return [None, None]
Now this method will handle both case either form submitted or not.

Unbound method TypeError

I've just been reading an article that talks about implementing a parser in python:
http://effbot.org/zone/simple-top-down-parsing.htm
The general idea behind the code is described in this paper: http://mauke.hopto.org/stuff/papers/p41-pratt.pdf
Being fairly new to writing parsers in python so I'm trying to write something similar as a learning exercise. However when I attempted to try to code up something similar to what was found in the article I am getting an TypeError: unbound method TypeError. This is the first time I've encountered such an error and I've spent all day trying to figure this out but I haven't solved the issue. Here is a minimal code example (in it's entirety) that has this problem:
import re
class Symbol_base(object):
""" A base class for all symbols"""
id = None # node/token type name
value = None #used by literals
first = second = third = None #used by tree nodes
def nud(self):
""" A default implementation for nud """
raise SyntaxError("Syntax error (%r)." % self.id)
def led(self,left):
""" A default implementation for led """
raise SyntaxError("Unknown operator (%r)." % self.id)
def __repr__(self):
if self.id == "(name)" or self.id == "(literal)":
return "(%s %s)" % (self.id[1:-1], self.value)
out = [self.id, self.first, self.second, self.third]
out = map(str, filter(None,out))
return "(" + " ".join(out) + ")"
symbol_table = {}
def symbol(id, bindingpower=0):
""" If a given symbol is found in the symbol_table return it.
If the symblo cannot be found theni create the appropriate class
and add that to the symbol_table."""
try:
s = symbol_table[id]
except KeyError:
class s(Symbol_base):
pass
s.__name__ = "symbol:" + id #for debugging purposes
s.id = id
s.lbp = bindingpower
symbol_table[id] = s
else:
s.lbp = max(bindingpower,s.lbp)
return s
def infix(id, bp):
""" Helper function for defining the symbols for infix operations """
def infix_led(self, left):
self.first = left
self.second = expression(bp)
return self
symbol(id, bp).led = infix_led
#define all the symbols
infix("+", 10)
symbol("(literal)").nud = lambda self: self #literal values must return the symbol itself
symbol("(end)")
token_pat = re.compile("\s*(?:(\d+)|(.))")
def tokenize(program):
for number, operator in token_pat.findall(program):
if number:
symbol = symbol_table["(literal)"]
s = symbol()
s.value = number
yield s
else:
symbol = symbol_table.get(operator)
if not symbol:
raise SyntaxError("Unknown operator")
yield symbol
symbol = symbol_table["(end)"]
yield symbol()
def expression(rbp = 0):
global token
t = token
token = next()
left = t.nud()
while rbp < token.lbp:
t = token
token = next()
left = t.led(left)
return left
def parse(program):
global token, next
next = tokenize(program).next
token = next()
return expression()
def __main__():
print parse("1 + 2")
if __name__ == "__main__":
__main__()
When I try to run this with pypy:
Traceback (most recent call last):
File "app_main.py", line 72, in run_toplevel
File "parser_code_issue.py", line 93, in <module>
__main__()
File "parser_code_issue.py", line 90, in __main__
print parse("1 + 2")
File "parser_code_issue.py", line 87, in parse
return expression()
File "parser_code_issue.py", line 81, in expression
left = t.led(left)
TypeError: unbound method infix_led() must be called with symbol:+ instance as first argument (got symbol:(literal) instance instead)
I'm guessing this happens because I don't create an instance for the infix operations but I'm not really wanting to create an instance at that point. Is there some way I can change those methods without creating instances?
Any help explaining why this is happening and what I can do to fix the code is greatly appreciated!
Also is this behaviour going to change in python 3?
You forgot to create an instance of the symbol in your tokenize() function; when not a number, yield symbol(), not symbol:
else:
symbol = symbol_table.get(operator)
if not symbol:
raise SyntaxError("Unknown operator")
yield symbol()
With that one change your code prints:
(+ (literal 1) (literal 2))
You haven't bound new function to the instance of your object.
import types
obj = symbol(id, bp)
obj.led = types.MethodType(infix_led, obj)
See accepted answer to another SO question

Categories