Suppose I have some (simplified) BeautifulSoup code like this, pulling data into a dictionary:
tournament_info = soup.find_all('li')
stats['Date'] = tournament_info[0].text
stats['Location'] = tournament_info[1].text
stats['Prize'] = tournament_info[3].text.split(':')[1].strip()
In the case where the initial find_all returns an exception, I want all the dictionary entries to be 'None'. And in the case of any of the individual dictionary assignments returning an exception, I want 'None' too.
Is there any nice way to write this, other than something horrible like below?
try:
tournament_info = soup.find_all('li')
except:
m_stats['Date'] = 'None'
m_stats['Location'] = 'None'
m_stats['Prize'] = 'None'
try:
m_stats['Date'] = tournament_info[0].text
except:
m_stats['Date'] = 'None'
try:
m_stats['Location'] = tournament_info[1].text
except:
m_stats['Location'] = 'None'
try:
m_stats['Prize'] = tournament_info[3].text.split(':')[1].strip()
except:
m_stats['Prize'] = 'None'
Create own class
class Stats(dict):
tournament_info = []
def __init__(self, tournament_info, **kwargs):
super(Stats, self).__init__(**kwargs)
self.tournament_info = tournament_info
self['Date'] = self.get_tournament_info_text(0)
self['Location'] = self.get_tournament_info_text(1)
prize = self.get_tournament_info_text(2)
if prize is not None:
prize = prize.split(':')[1].strip()
self['Prize'] = prize
def get_tournament_info_text(self, index):
try:
return self.tournament_info[index]['text']
except:
return None
tournament_info = [
{
'text': 'aaa'
},
{},
{
'text': 'bbb:ccc '
}
]
m_stats = Stats(tournament_info)
print m_stats
Here's what I can suggest for your code:
info = soup.find_all('li')
if not info:
m_stats = dict.fromkeys(m_stats, None)
return
mappings = {
'Date': 0,
'Location': 1,
'Prize': 3
}
for key in mappings:
value = None
try:
value = info[mappings[key]].text
if mappings[key] == 3:
value = value.split(':')[1].strip()
except IndexError:
pass
m_stats[key] = value
Alternatively, you can create a function that will handle the exceptions for you:
def get_value(idx):
value = None
try:
value = info[idx].text
except IndexError:
pass
return value
m_stats['Date'] = get_value(0)
m_stats['Location'] = get_value(1)
m_stats['Prize'] = get_value(3)
if m_stats['Prize']:
m_stats['Prize'].split(':')[1].strip()
The solution I went for was to create a blank template dictionary (actually a JSON) with all the keys set to 'None'.
Every time the page is scraped, m_stats is first initialised with this blank dictionary (loaded from the JSON). If an exception occurs, it is just simply passed (with some logging), and the value is left as 'None'. There is then no need to explicitly assign 'None' every single time.
Not sure if it's correct to mark this as the "answer", as it is quite specific to my needs, but that's what I did anyway.
Related
This question already has answers here:
Possible to enforce type hints?
(2 answers)
Closed 12 months ago.
I struggle with Python(3.9.7) automatically adding singlequotes around a float-value, which leads to a failing assertion.
My goal is to use pytest to assert an expected output of a parsing-function. The parser takes a json-object (shortened code sample below) and returns a list of Signal objects. I copied the print-out from logging.debug(f"{signal_list}") inside the parsing function, and assigned it to expected_result in my test-function:
#Arrange
message = json.dumps({"data":{"name":"battery_volt","alias":"Volt","unit":"volt","value":12.0,"time":1644587969}})
expected_result = [Signal(id=None, name='battery_volt', description=None, value=12.0, type=None, unit='volt', time='2022-02-11T13:59:29')]
print(expected_result)
p = Parser(message)
#Act
result = p.parse_message()
#Assert
assert result == expected_result
Irritatingly, pytest -vv throws an AssertionError:
E Full diff:
E - [Signal(id=None, name='battery_volt', description=None, value='12.0', type=None, unit='volt', time='2022-02-11T13:59:29')]
E ? - -
E + [Signal(id=None, name='battery_volt', description=None, value=12.0, type=None, unit='volt', time='2022-02-11T13:59:29')]
The upper line seems to be the value of expected_result, because print(expected_result)
also adds the singlequotes around the 12.0
I assume the copied output from logging.debug(f"{signal_list}") isn't the same as the real value of result. I tried typecasting expected_result as list, str()-converting both result and expected_result inside the test, but expected_result always has '12.0' and result has 12.0.
I desperatly need a hint how to do this kind of assertion the correct way.
EDIT:
Here is the parsing function:
def parse_message(self):
message = json.loads(self.message)
#logging.debug(f"message is: {message}")
message_data = message.get('data', {})
parsed_data = []
try:
device = message_data.get('device', None)
if device is not None:
vehicle = self.parse_vehicle(device)
parsed_data.append(vehicle)
else:
logging.error("'device' was None!")
except Exception as e:
logging.error(e)
signals = []
try:
data = message_data.get('data', None)
if data is not None:
signals = self.parse_signals(data)
gps = message_data.get('gps', None)
if gps is not None:
gps_signal = self.parse_gps(gps)
signals.append(gps_signal)
parsed_data.append(signals)
except Exception as e:
logging.error(e)
return parsed_data
if __name__ == "__main__":
setup_logging()
message = json.dumps({"consumerid":redacted,"data":{"device":{"time":1644587969,"imei":"redacted","man_id":redacted,"car_id":999,"vin":"redacted"},"data":[{"name":"battery_volt","alias":"Volt","unit":"volt","value":12.0,"time":1644587969}],"gps":{"lat":51.437515,"lon":6.9281199,"dir":252,"alt":88,"sat":19,"time":1644587969}},"event":"redacted","itemid":redacted,"itemtype":1,"senderip":"redacted"})
p = Parser(message)
signal_list = p.parse_message()
logging.debug(f"{signal_list}")
Please note that the passed json-objects are more complex than the code-sample in the original post.
class Signal(BaseModel):
id: int = None
name: str = None
description: str = None
value: str = None
type: str = None
unit: str = None
time: str = None
EDIT2 - Assignment of Signal.value happens here:
def parse_signals(self, data):
signals = []
#logging.debug(f"data is : {data}")
for data_object in data:
signal = Signal()
try:
signal.name = data_object.get('name', None)
#get dtc-count value as seperate return-element, needed by the controller to handle dtc-codes
if signal.name == 'dtc_count':
self.dtc_count == data_object.get('value', None)
signal.description = data_object.get('description', None)
signal.value = data_object.get('value', None)
signal.time = datetime.datetime.utcfromtimestamp(data_object.get('time', None)).isoformat()
signal.unit = data_object.get('unit', None)
if signal.unit == "dtc":
signal.type = "1"
if signal.name is not None:
signals.append(signal)
#logging.debug(signal.__dict__)
except Exception as e:
logging.error(f"While parsing signal {data_object}, the following error occured: {e}")
return signals
When parse_message is called as __name__ == "main":, the testcode beneath outputs value=12.0
Ok, turns out python type hints don't enforce like I expected:
When parse_message is called by way of __name__ == "main":, the testcode beneath outputs value=12.0 Apparently, despite the type-hint :str for Signal.value, 12.0 was assigned as float. When I tried to sys.stdout.write(signal_list), I got a TypeError.
I now simply str()-convert in parse_signals() like this
signal.value = str(data_object.get('value', None))
resulting in having my value in single quotes consistently.
This is more a general programming question than related to the actual code.
I have this ugly code that takes an input from JIRA and converts it from milliseconds to hours written out multiple times like below:
def convertMillis(ms):
hours = ms / 1000 / 60 / 60
return hours
try:
newaccsla_comp = convertMillis(issues.fields.customfield_10705.completedCycles[0].remainingTime.millis)
except:
newaccsla_comp = np.nan
try:
newaccsla_ongoing = convertMillis(issues.fields.customfield_10705.ongoingCycle.remainingTime.millis)
except:
newaccsla_ongoing = np.nan
try:
paymentssla_comp = convertMillis(issues.fields.customfield_10136.completedCycles[0].remainingTime.millis)
except:
paymentssla_comp = np.nan
try:
paymentssla_ongoing = convertMillis(issues.fields.customfield_10136.ongoingCycle.remainingTime.millis)
except:
paymentssla_ongoing = np.nan
try:
modifysla_comp = convertMillis(issues.fields.customfield_10713.completedCycles[0].remainingTime.millis)
except:
modifysla_comp = np.nan
try:
modifysla_ongoing = convertMillis(issues.fields.customfield_10713.ongoingCycle.remainingTime.millis)
except:
modifysla_ongoing = np.nan
try:
MFsla_comp = convertMillis(issues.fields.customfield_10711.completedCycles[0].remainingTime.millis)
except:
MFsla_comp = np.nan
try:
MFsla_ongoing = convertMillis(issues.fields.customfield_10711.ongoingCycle.remainingTime.millis)
except:
MFsla_ongoing = np.nan
try:
closeaccsla_comp = convertMillis(issues.fields.customfield_10140.completedCycles[0].remainingTime.millis)
except:
closeaccsla_comp = np.nan
try:
closeaccsla_ongoing = convertMillis(issues.fields.customfield_10140.ongoingCycle.remainingTime.millis)
except:
closeaccsla_ongoing = np.nan
try:
casla_comp = convertMillis(issues.fields.customfield_10213.completedCycles[0].remainingTime.millis)
except:
casla_comp = np.nan
try:
casla_ongoing = convertMillis(issues.fields.customfield_10213.ongoingCycle.remainingTime.millis)
except:
casla_ongoing = np.nan
try:
at_comp = convertMillis(issues.fields.customfield_10144.completedCycles[0].remainingTime.millis)
except:
at_comp = np.nan
try:
at_ongoing = convertMillis(issues.fields.customfield_10144.ongoingCycle.remainingTime.millis)
except:
at_ongoing = np.nan
try:
modfeesla_comp = convertMillis(issues.fields.customfield_10134.completedCycles[0].remainingTime.millis)
except:
modfeesla_comp = np.nan
try:
modfeesla_ongoing = convertMillis(issues.fields.customfield_10134.ongoingCycle.remainingTime.millis)
except:
modfeesla_ongoing = np.nan
try:
tdsla_comp = convertMillis(issues.fields.customfield_11200.completedCycles[0].remainingTime.millis)
except:
tdsla_comp = np.nan
try:
tdsla_ongoing = convertMillis(issues.fields.customfield_11200.ongoingCycle.remainingTime.millis)
except:
tdsla_ongoing = np.nan
try:
querysla_comp = convertMillis(issues.fields.customfield_10142.completedCycles[0].remainingTime.millis)
except:
querysla_comp = np.nan
try:
querysla_ongoing = convertMillis(issues.fields.customfield_10142.ongoingCycle.remainingTime.millis)
except:
querysla_ongoing = np.nan
try:
recsla_comp = convertMillis(issues.fields.customfield_15600.completedCycles[0].remainingTime.millis)
except:
recsla_comp = np.nan
try:
recsla_ongoing = convertMillis(issues.fields.customfield_15600.ongoingCycle.remainingTime.millis)
except:
recsla_ongoing = np.nan
try:
reportsla_comp = convertMillis(issues.fields.customfield_15601.completedCycles[0].remainingTime.millis)
except:
reportsla_comp = np.nan
try:
reportsla_ongoing = convertMillis(issues.fields.customfield_15601.ongoingCycle.remainingTime.millis)
except:
reportsla_ongoing = np.nan
I would be comfortable doing something like taking all the custom fields, putting them in one list then doing a for over the function like this:
field_list = ['customfield_10705','customfield_10136','customfield_10713','customfield_10711','customfield_10140','customfield_10213','customfield_10144','customfield_10134','customfield_11200','customfield_10142','customfield_15600','customfield_15601']
def get_jira_hours(field):
try:
newaccsla_comp = convertMillis(issues.fields.field.completedCycles[0].remainingTime.millis)
except:
newaccsla_comp = np.nan
try:
newaccsla_ongoing = convertMillis(issues.fields.field.ongoingCycle.remainingTime.millis)
except:
newaccsla_ongoing = np.nan
for field in field_list:
get_jira_hours(field)
However there is three variables that are linked to each function call that I need to iterate over - the customfield_10705 and the name to save each try/except too newaccsla_comp and newaccsla_ongoing.
Here's the variables in order.. ie. field_list[0] is linked to name_list[0]
field_list = ['customfield_10705','customfield_10136','customfield_10713','customfield_10711','customfield_10140','customfield_10213','customfield_10144','customfield_10134','customfield_11200','customfield_10142','customfield_15600','customfield_15601']
name_list = ['newaccsla','paymentssla','modifysla','MFsla','closeaccsla','casla','at','modfeesla','tdsla','querysla','recsla','reportssla']
Best way to iterate over these? Thanks.
First, you can turn each of those four-line blocks into a one-liner if you just edit your convertMillis function to return np.nan instead of raising—or, if you can't do that, wrap the function in another one:
def convertMillisOrNan(millis):
try:
return convertMillis(millis)
except:
return np.nan
newaccsla_comp = convertMillisOrNan(issues.fields.customfield_10705.completedCycles[0].remainingTime.millis)
newaccsla_ongoing = convertMillisOrNan(issues.fields.customfield_10705.ongoingCycle.remainingTime.millis)
# etc.
Or, maybe the exception you're trying to handle comes a bit farther up. You're always calling convertMillis on <something>.remainingTime.millis. What if, say, the field always exists, and always has an ongoingCycle, but that doesn't always have a remainingTime attribute? Then you can push that part into the try:, and also simplify things even further at the same time:
def convertCycle(cycle):
try:
return convertMillis(cycle.remainingTime.millis)
except:
return np.nan
newaccsla_comp = convertCycle(issues.fields.customfield_10705.completedCycles[0])
newaccsla_ongoing = convertCycle(issues.fields.customfield_10705.ongoingCycle)
If the exception comes even higher up—e.g., if the field doesn't always have an ongoingCycle—obviously you need to push more of the expression inside the try: block; I'm really just making a guess here at what you're trying to handle with that except:.
And, while you're at it, do you really want a bare except:? That will handle any exception, not just an AttributeError or ValueError or whatever kind of exception you were actually expecting.
Meanwhile, your existing jira_hours refactor doesn't work because you can't just use .field when field is a variable holding a string. One way to solve that is:
def get_jira_hours(field):
comp = convertCycle(field.completedCycles[0])
ongoing = convertCycle(field.ongoingCycle)
return comp, ongoing
newaccsla_comp, newaccsla_ongoing = get_jira_hours(issues.fields.customfield_10705)
paymentssla_comp, paymentssla_ongoing = get_jira_hours(issues.fields.customfield_10136)
# etc.
Another way to solve it is with getattr—which I'll show below.
But you can do even better. Do you really need these all to be independent variables, rather than, say, items in a dict?
fieldmap = {
'newaccsla': 'customfield_10136',
'paymentssla': 'customfield_10705',
# etc.
}
values = {}
for fieldname, customfieldname in fieldmap.items():
field = getattr(issues.fields, customfieldname)
comp, ongoing = get_jira_hours(field)
values[f'{fieldname}_comp'] = comp
values[f'{fieldname}_ongoing'] = ongoing
Now, instead of using newaccsla_comp, you have to use values['newaccsla_comp']. But I suspect your code is actually going to be doing a lot of code where you copy and paste the same thing for each variable, which you can replace with code that just loops over the dict.
But if you really do need these to be independent variables—which, again, you probably doing—you can do the same thing by just using globals() or locals() instead of values.
On the other hand, if you're going to be repeating yourself over comp/ongoing pairs of values, just store the pairs in the dict: values[fieldname] = comp, ongoing.
Also, since all of the custom field names seem to be customfield_NNNNN, you can simplify things even further, by mapping 'newaccsla': 10136, etc., and then doing getattr(issue.fields, f'customfield_{customfield}').
in fucntion getLink(urls), I have return (cloud,parent,children)
in main function, I have (cloud,parent,children) = getLink(urls) and I got error of this line: TypeError: 'NoneType' object is not iterable
parent and children are all list of http links. since, it is not able to paste them here, parent is a list contains about 30 links; children is a list contains about 30 items, each item is about 10-100 links which is divide by ",".
cloud is a list contain about 100 words, like that: ['official store', 'Java Applets Centre', 'About Google', 'Web History'.....]
I didnot know why I get an error. Is there anything wrong in passing parameter? Or because the list take too much space?
#crawler url: read webpage and return a list of url and a list of its name
def crawler(url):
try:
m = urllib.request.urlopen(url)
msg = m.read()
....
return (list(set(list(links))),list(set(list(titles))) )
except Exception:
print("url wrong!")
#this is the function has gone wrong: it throw an exception here, also the error I mentioned, also it will end while before len(parent) reach 100.
def getLink(urls):
try:
newUrl=[]
parent = []
children =[]
cloud =[]
i=0
while len(parent)<=100:
url = urls[i]
if url in parent:
i += 1
continue
(links, titles) = crawler(url)
parent.append(url)
children.append(",".join(links))
cloud = cloud + titles
newUrl= newUrl+links
print ("links: ",links)
i += 1
if i == len(urls):
urls = list(set(newUrl))
newUrl = []
i = 0
return (cloud,parent,children)
except Exception:
print("can not get links")
def readfile(file):
#not related, this function will return a list of url
def main():
file='sampleinput.txt'
urls=readfile(file)
(cloud,parent,children) = getLink(urls)
if __name__=='__main__':
main()
There might be a way that your function ends without reaching the explicit return statement.
Look at the following example code.
def get_values(x):
if x:
return 'foo', 'bar'
x, y = get_values(1)
x, y = get_values(0)
When the function is called with 0 as parameter the return is skipped and the function will return None.
You could add an explicit return as the last line of your function. In the example given in this answer it would look like this.
def get_values(x):
if x:
return 'foo', 'bar'
return None, None
Update after seing the code
When the exception is triggered in get_link you just print something and return from the function. You have no return statement, so Python will return None. The calling function now tries to expand None into three values and that fails.
Change your exception handling to return a tuple with three values like you do it when everything is fine. Using None for each value is a good idea for it shows you, that something went wrong. Additionally I wouldn't print anything in the function. Don't mix business logic and input/output.
except Exception:
return None, None, None
Then in your main function use the following:
cloud, parent, children = getLink(urls)
if cloud is None:
print("can not get links")
else:
# do some more work
I have this long list of try except statement:
try:
uri = entry_obj['media$group']['media$content'][0]['url']
except (KeyError, IndexError):
uri = None
try:
position = entry_obj['yt$position']['$t']
except KeyError:
position = None
try:
description = entry_obj['content']['$t']
except KeyError:
description = None
try:
seconds = entry_obj['media$group']['yt$duration']['seconds']
except KeyError:
seconds = None
try:
thumbnails = entry_obj['media$group']['media$thumbnail']
except KeyError:
thumbnails = None
Is there a more concise way to write this?
If you tire of figuring out what to use for default values in get() calls, just write a helper function:
def resolve(root, *keys):
for key in keys:
try:
root = root[key]
except (KeyError, IndexError):
return None
return root
Then you just write, e.g.:
uri = resolve(entry_obj, 'media$group', 'media$content', 0, 'url')
To simplify the calls a little, you might beef up the helper function to take a single string for the keys and split on spaces; that way you don't have to type so many quotes, and we can also add a default value argument:
def resolve(root, keys, default=None):
for key in keys.split():
try:
root = root[key]
except (TypeError, KeyError):
try:
root = root[int(key)]
except (IndexError, ValueError, KeyError):
return default
uri = resolve(entry_obj, 'media$group media$content 0 url', '')
I thought of another good way to do this, not sure how it compares to kindall's method. We first define a method property:
def res(self, property):
try:
return property()
except (KeyError, IndexError):
return None
Then replace the try-except statements with:
url = res(lambda: entry_obj['media$group']['media$content'][0]['url'])
position = res(lambda: entry_obj['yt$position']['$t'])
description = res(lambda: entry_obj['content']['$t'])
duration = res(lambda: entry_obj['media$group']['yt$duration']['seconds'])
thumbnails = res(lambda: entry_obj['media$group']['media$thumbnail'])
Use the get method of dictionaries instead:
position = entry_object.get('yt$position').get('$t')
get will handle the case of a key not existing for you, and give you a (changable) fallback value instead in that case. You'll still need to handle the first IndexError manually, but all the ones that are just except KeyError: will disappear.
Ok, I recently started programming in Python, and I really like it.
However, I have run into a little issue.
I want to be able to define a function to take in some data and assign it to a variable that I designate, rather than have to perform the operation every time I want to submit the value.
Here is a code fragment:
try:
if elem.virtual.tag:
virt = True
temp_asset.set_virtual(True)
except AttributeError:
temp_asset.set_virtual(False)
if virt: #if virtual, get only faction, value, and range for presence
try:
fac = elem.presence.faction #an xml tag (objectified)
except AttributeError:
fac = "faction tag not found"
temp_asset.misload = True
try:
val = elem.presence.value
except AttributeError:
val = "value tag not found"
temp_asset.misload = True
try:
rang = elem.presence.range
except AttributeError:
rang = "range tag not found"
temp_asset.misload = True
#Set presence values
temp_asset.set_presence(fac, val, rang)
The functions set the values, but I want to be able to perform the error checking with something like this:
def checkval(self, variable_to_set, tag_to_use)
try:
variable_to_set = tag_to_use
except AttributeError:
variable_to_set = "tag not found"
temp_asset.misload = True
Is this doable? Let me know if I need to show more code.
Edit: I don't need pointers per se, just anything that works this way and saves typing.
Edit 2: Alternatively, I need a solution of how to check whether an objectified xml node exists (lxml).
Have you tried/looked into the getattr and setattr functions?
For example, assuming these "variables" are object attributes:
def checkval(self, attr, presence, tagstr):
tag = getattr(presence, tagstr, None) # tag = presence."tagstr" or None
setattr(self, attr, tag or 'tag not found') # ?? = presence."tagstr" or 'tag not found'
if tag is None:
self.temp_asset.misload = True
You call it like,
your_object.checkval('fac', elem.presence, 'faction')
Alternatively, you can pre-define these variables and set them default values before you attempt to look up the tags. For example:
class YourObject(object):
_attrmap = {
'fac': 'faction',
'val': 'value',
'rang': 'range',
}
def __init__(self):
# set default values
for attr, tagstr in self._attrmap.items():
setattr(self, attr, '%s tag not found' % tagstr)
def checkval(self, attr, presence):
for attr, tagstr in self._attrmap.items():
tag = getattr(presence, tagstr, None)
if tag is not None:
setattr(self, attr, tag)
else:
self.temp_asset.misload = True