Loading function parameters from a text file - python

I have the following function:
def request( url, type, headers, simulate = False, data = {}):
I want to be able to load the parameters from a text file and pass them to the function, I tried using evil eval below:
if execute_recovery:
for command in content:
logger.debug("Executing: "+command)
try:
result = eval(utilities.request("{0}").format(command))
if not result["Success"]:
continue_recovery = utilities.query_yes_no("Warning: Previous recovery command failed, attempt to continue recovery?\n")
if not continue_recovery:
break
else:
logger.debug("Command executed successfully...")
except Exception, e:
logger.debug( "Recovery: Eval Error, %s" % str(e) )
Where command would be a line in a text file like:
"http://192.168.1.1/accounts/1/users/1",delete,headers,simulate=False,data={}
This throws me the following error:
'request() takes at least 3 arguments (1 given)'
So presumably this means that it is interpreting the command as a single string instead of different parameters.
Does anybody know how to solve this?

I can't understand what you are trying to do there with eval or format. For one thing, you've put eval around the call to request itself, so it will evaluate the return value rather than call it with some dynamic value.
But you don't need eval at all. You just need to pass the arguments using the * and ** operators:
args = []
kwargs = {}
for arg in command.split(','):
if '=' in arg:
k, v = arg.split('=')
kwargs[k] = ast.literal_eval(v)
else:
args.append(arg)
result = utilities.request(*args, **kwargs)

Using #BurhanKhalid's suggestion, I decided to store the parameters as a json object and load them at run time like so:
Store parameters here:
def request( url, type, headers, simulate = False, data = {}):
if simulate:
recovery_command = {"url":url, "type" : type, "data" : data}
recovery.add_command(json.dumps(recovery_command))
...
Load parameters here:
def recovery():
...
if execute_recovery:
for command in content:
logger.debug("Executing: "+command)
try:
recovery_command = json.loads(command)
result = utilities.request(url = recovery_command["url"], type = recovery_command["type"], headers = headers, simulate = False, data = recovery_command["data"])
if not result["Success"]:
continue_recovery = utilities.query_yes_no("Warning: Previous recovery command failed, attempt to continue recovery?\n")
if not continue_recovery:
break
else:
logger.debug("Command executed successfully...")
except Exception, e:
logger.debug( "Recovery: Eval Error, %s" % str(e) )

Related

Python: Handle Missing Object keys in mapping and continue instructions

I'm fairly new to Python so bear with me please.
I have a function that takes two parameters, an api response and an output object, i need to assign some values from the api response to the output object:
def map_data(output, response):
try:
output['car']['name'] = response['name']
output['car']['color'] = response['color']
output['car']['date'] = response['date']
#other mapping
.
.
.
.
#other mapping
except KeyError as e:
logging.error("Key Missing in api Response: %s", str(e))
pass
return output
Now sometimes, the api response is missing some keys i'm using to generate my output object, so i used the KeyError exception to handle this case.
Now my question is, in a case where the 'color' key is missing from the api response, how can i catch the exception and continue to the line after it output['car']['date'] = response['date'] and the rest of the instructions.
i tried the pass instruction but it didn't have any affect.
Ps: i know i can check the existence of the key using:
if response.get('color') is not None:
output['car']['color'] = response['color']
and then assign the values but seeing that i have about 30 values i need to map, is there any other way i can implement ? Thank you
A few immediate ideas
(FYI - I'm not going to explain everything in detail - you can check out the python docs for more info, examples etc - that will help you learn more, rather than trying to explain everything here)
Google 'python handling dict missing keys' for a million methods/ideas/approaches - it's a common use case!
Convert your response dict to a defaultdict. In that case you can have a default value returned (eg None, '', 'N/A' ...whatever you like) if there is no actual value returned.
In this case you could do away with the try and every line would be executed.
from collections import defaultdict
resp=defaultdict(lambda: 'NA', response)
output['car']['date'] = response['date'] # will have value 'NA' if 'date' isnt in response
Use the in syntax, perhaps in combination with a ternary else
output['car']['color'] = response['color'] if 'color' in response
output['car']['date'] = response['date'] if 'date' in response else 'NA'
Again you can do away with the try block and every line will execute.
Use the dictionary get function, which allows you to specify a default if there is no value for that key:
output['car']['color'] = response.get('car', 'no car specified')
You can create a utility function that gets the value from the response and if the value is not found, it returns an empty string. See example below:
def get_value_from_response_or_null(response, key):
try:
value = response[key]
return value
except KeyError as e:
logging.error("Key Missing in api Response: %s", str(e))
return ""
def map_data(output, response):
output['car']['name'] = get_value_from_response_or_null(response, 'name')
output['car']['color'] = get_value_from_response_or_null(response, 'color')
output['car']['date'] = get_value_from_response_or_null(response, 'date')
# other mapping
# other mapping
return output

Check that a key from json output exists

I keep getting the following error when trying to parse some json:
Traceback (most recent call last):
File "/Users/batch/projects/kl-api/api/helpers.py", line 37, in collect_youtube_data
keywords = channel_info_response_data['items'][0]['brandingSettings']['channel']['keywords']
KeyError: 'brandingSettings'
How do I make sure that I check my JSON output for a key before assigning it to a variable? If a key isn’t found, then I just want to assign a default value. Code below:
try:
channel_id = channel_id_response_data['items'][0]['id']
channel_info_url = YOUTUBE_URL + '/channels/?key=' + YOUTUBE_API_KEY + '&id=' + channel_id + '&part=snippet,contentDetails,statistics,brandingSettings'
print('Querying:', channel_info_url)
channel_info_response = requests.get(channel_info_url)
channel_info_response_data = json.loads(channel_info_response.content)
no_of_videos = int(channel_info_response_data['items'][0]['statistics']['videoCount'])
no_of_subscribers = int(channel_info_response_data['items'][0]['statistics']['subscriberCount'])
no_of_views = int(channel_info_response_data['items'][0]['statistics']['viewCount'])
avg_views = round(no_of_views / no_of_videos, 0)
photo = channel_info_response_data['items'][0]['snippet']['thumbnails']['high']['url']
description = channel_info_response_data['items'][0]['snippet']['description']
start_date = channel_info_response_data['items'][0]['snippet']['publishedAt']
title = channel_info_response_data['items'][0]['snippet']['title']
keywords = channel_info_response_data['items'][0]['brandingSettings']['channel']['keywords']
except Exception as e:
raise Exception(e)
You can either wrap all your assignment in something like
try:
keywords = channel_info_response_data['items'][0]['brandingSettings']['channel']['keywords']
except KeyError as ignore:
keywords = "default value"
or, let say, use .has_key(...). IMHO In your case first solution is preferable
suppose you have a dict, you have two options to handle the key-not-exist situation:
1) get the key with default value, like
d = {}
val = d.get('k', 10)
val will be 10 since there is not a key named k
2) try-except
d = {}
try:
val = d['k']
except KeyError:
val = 10
This way is far more flexible since you can do anything in the except block, even ignore the error with a pass statement if you really don't care about it.
How do I make sure that I check my JSON output
At this point your "JSON output" is just a plain native Python dict
for a key before assigning it to a variable? If a key isn’t found, then I just want to assign a default value
Now you know you have a dict, browsing the official documention for dict methods should answer the question:
https://docs.python.org/3/library/stdtypes.html#dict.get
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.
so the general case is:
var = data.get(key, default)
Now if you have deeply nested dicts/lists where any key or index could be missing, catching KeyErrors and IndexErrors can be simpler:
try:
var = data[key1][index1][key2][index2][keyN]
except (KeyError, IndexError):
var = default
As a side note: your code snippet is filled with repeated channel_info_response_data['items'][0]['statistics'] and channel_info_response_data['items'][0]['snippet'] expressions. Using intermediate variables will make your code more readable, easier to maintain, AND a bit faster too:
# always set a timeout if you don't want the program to hang forever
channel_info_response = requests.get(channel_info_url, timeout=30)
# always check the response status - having a response doesn't
# mean you got what you expected. Here we use the `raise_for_status()`
# shortcut which will raise an exception if we have anything else than
# a 200 OK.
channel_info_response.raise_for_status()
# requests knows how to deal with json:
channel_info_response_data = channel_info_response.json()
# we assume that the response MUST have `['items'][0]`,
# and that this item MUST have "statistics" and "snippets"
item = channel_info_response_data['items'][0]
stats = item["statistics"]
snippet = item["snippet"]
no_of_videos = int(stats.get('videoCount', 0))
no_of_subscribers = int(stats.get('subscriberCount', 0))
no_of_views = int(stats.get('viewCount', 0))
avg_views = round(no_of_views / no_of_videos, 0)
try:
photo = snippet['thumbnails']['high']['url']
except KeyError:
photo = None
description = snippet.get('description', "")
start_date = snippet.get('publishedAt', None)
title = snippet.get('title', "")
try:
keywords = item['brandingSettings']['channel']['keywords']
except KeyError
keywords = ""
You may also want to learn about string formatting (contatenating strings is quite error prone and barely readable), and how to pass arguments to requests.get()

Getting wrong result from JSON - Python 3

Im working on a small project of retrieving information about books from the Google Books API using Python 3. For this i make a call to the API, read out the variables and store those in a list. For a search like "linkedin" this works perfectly. However when i enter "Google", it reads the second title from the JSON input. How can this happen?
Please find my code below (Google_Results is the class I use to initialize the variables):
import requests
def Book_Search(search_term):
parms = {"q": search_term, "maxResults": 3}
r = requests.get(url="https://www.googleapis.com/books/v1/volumes", params=parms)
print(r.url)
results = r.json()
i = 0
for result in results["items"]:
try:
isbn13 = str(result["volumeInfo"]["industryIdentifiers"][0]["identifier"])
isbn10 = str(result["volumeInfo"]["industryIdentifiers"][1]["identifier"])
title = str(result["volumeInfo"]["title"])
author = str(result["volumeInfo"]["authors"])[2:-2]
publisher = str(result["volumeInfo"]["publisher"])
published_date = str(result["volumeInfo"]["publishedDate"])
description = str(result["volumeInfo"]["description"])
pages = str(result["volumeInfo"]["pageCount"])
genre = str(result["volumeInfo"]["categories"])[2:-2]
language = str(result["volumeInfo"]["language"])
image_link = str(result["volumeInfo"]["imageLinks"]["thumbnail"])
dict = Google_Results(isbn13, isbn10, title, author, publisher, published_date, description, pages, genre,
language, image_link)
gr.append(dict)
print(gr[i].title)
i += 1
except:
pass
return
gr = []
Book_Search("Linkedin")
I am a beginner to Python, so any help would be appreciated!
It does so because there is no publisher entry in volumeInfo of the first entry, thus it raises a KeyError and your except captures it. If you're going to work with fuzzy data you have to account for the fact that it will not always have the expected structure. For simple cases you can rely on dict.get() and its default argument to return a 'valid' default entry if an entry is missing.
Also, there are a few conceptual problems with your function - it relies on a global gr which is bad design, it shadows the built-in dict type and it captures all exceptions guaranteeing that you cannot exit your code even with a SIGINT... I'd suggest you to convert it to something a bit more sane:
def book_search(search_term, max_results=3):
results = [] # a list to store the results
parms = {"q": search_term, "maxResults": max_results}
r = requests.get(url="https://www.googleapis.com/books/v1/volumes", params=parms)
try: # just in case the server doesn't return valid JSON
for result in r.json().get("items", []):
if "volumeInfo" not in result: # invalid entry - missing volumeInfo
continue
result_dict = {} # a dictionary to store our discovered fields
result = result["volumeInfo"] # all the data we're interested is in volumeInfo
isbns = result.get("industryIdentifiers", None) # capture ISBNs
if isinstance(isbns, list) and isbns:
for i, t in enumerate(("isbn10", "isbn13")):
if len(isbns) > i and isinstance(isbns[i], dict):
result_dict[t] = isbns[i].get("identifier", None)
result_dict["title"] = result.get("title", None)
authors = result.get("authors", None) # capture authors
if isinstance(authors, list) and len(authors) > 2: # you're slicing from 2
result_dict["author"] = str(authors[2:-2])
result_dict["publisher"] = result.get("publisher", None)
result_dict["published_date"] = result.get("publishedDate", None)
result_dict["description"] = result.get("description", None)
result_dict["pages"] = result.get("pageCount", None)
genres = result.get("authors", None) # capture genres
if isinstance(genres, list) and len(genres) > 2: # since you're slicing from 2
result_dict["genre"] = str(genres[2:-2])
result_dict["language"] = result.get("language", None)
result_dict["image_link"] = result.get("imageLinks", {}).get("thumbnail", None)
# make sure Google_Results accepts keyword arguments like title, author...
# and make them optional as they might not be in the returned result
gr = Google_Results(**result_dict)
results.append(gr) # add it to the results list
except ValueError:
return None # invalid response returned, you may raise an error instead
return results # return the results
Then you can easily retrieve as much info as possible for a term:
gr = book_search("Google")
And it will be far more tolerant of data omissions, provided that your Google_Results type makes most of the entries optional.
Following #Coldspeed's recommendation it became clear that missing information in the JSON file caused the exception to run. Since I only had a "pass" statement there it skipped the entire result. Therefore I will have to adapt the "Try and Except" statements so errors do get handled properly.
Thanks for the help guys!

How to use bulk upsert in a loop?

The fields that I have in Mongoldb are;
id, website_url, status.
I need to find the website_url and update its status to 3 and add a new field called err_desc.
I have a list of website_urls, its status and its err_desc.
Below is my code.
client = MongoClient('localhost', 9000)
db1 = client['Company_Website_Crawl']
collection1 = db1['All']
posts1 = collection1.posts
bulk = posts1.initialize_ordered_bulk_op()
website_url = ["http://www.example.com","http://example2.com/"]
err_desc = ["error1","error2"]
for i in website_url:
parsed_uri = urlparse(i)
domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
final_url = domain
final_url_strip = domain.rstrip("/")
print i,final_url,final_url_strip,"\n"
try:
k = bulk.find({'website_url':i}).upsert().update({'$push':{'err_desc':err_desc,'status':3}})
k = bulk.execute()
print k
except Exception as e:
print "fail"
print e
Error
fail batch op errors occurred
fail Bulk operations can only be executed once.
Initially I used
k = posts1.update({'website_url':final_url_strip},{'$set':{'err_desc':err_desc,'status':3}},multi=True)
It was too slow for 5M records. So I wanted to use bulk update option. Kindly help me to use bulk upsert for this scenario.
The error message is telling you that you need to re-initialize the batch writes operation after calling execute(). But the thing is, you are doing it wrong. In you case, you need to call execute at the end of the for loop like this:
from itertools import count
ct = count()
for url in website_url:
...
try:
bulk.find({'website_url':i}).upsert().update({'$push':{'err_desc':err_desc,'status':3}})
val = next(ct)
except Exception as e:
...
if val > 0:
bulk.execute()
Also note that Bulk() is now deprecated and replaced with bulkwrite

ConfigObj option validation

I am using ConfigObj and Validator to parse a configuration file in python. While I like this tool a lot, I am having trouble with validation using a configSpec file. I am using the option() configSpec type that forces the value to be chosen from a controlled vocabulary:
output_mode = option("Verbose", "Terse", "Silent")
I want my code to know when the user enters an option that's not in the CV. From what I have fond, Validator only seems to say which config key failed validation, but not why it failed:
from configobj import ConfigObj, flatten_errors
from validate import Validator
config = ConfigObj('config.ini', configspec='configspec.ini')
validator = Validator()
results = config.validate(validator)
if results != True:
for (section_list, key, _) in flatten_errors(config, results):
if key is not None:
print 'The "%s" key in the section "%s" failed validation' % (key, ', '.join(section_list))
else:
print 'The following section was missing:%s ' % ', '.join(section_list)
That code snippet works but there are any number of reasons why a key might have failed validation, from not being in an integer range to not being in the CV. I don't want to have to interrogate the key name and raise a different kind of exception depending on the failure cases for that key. Is there a cleaner way to handle specific types of validation errors?
Long time stackoverflow reader, first time poster :-)
Update: I think this does what I want to do. The key is that config obj stores errors as Exceptions which can then be checked against those that subclass ValidateError. Then you just have to do one check per subclass rather than one check per parameter value. It might be nicer if validate just threw an exception if validation failed but maybe you would lose other functionality.
self.config = configobj.ConfigObj(configFile, configspec=self.getConfigSpecFile())
validator = Validator()
results = self.config.validate(validator, preserve_errors=True)
for entry in flatten_errors(self.config, results):
[sectionList, key, error] = entry
if error == False:
msg = "The parameter %s was not in the config file\n" % key
msg += "Please check to make sure this parameter is present and there are no mis-spellings."
raise ConfigException(msg)
if key is not None:
if isinstance(error, VdtValueError):
optionString = self.config.configspec[key]
msg = "The parameter %s was set to %s which is not one of the allowed values\n" % (key, self.config[key])
msg += "Please set the value to be in %s" % optionString
raise ConfigException(msg)
OptionString is just a string of the form option("option 1", "option 2") rather than a list so to get this to look nice, you need to grab the substring in the ()'s.
For future reference for anyone interested, you could also check for extraneous data. This can be handled with the get_extra_values function. The complete example shown below hence does:
load the configuration with validator
look for all the validated errors
verify extra values
from configobj import ConfigObj, ConfigObjError, flatten_errors, get_extra_values
from validate import Validator, VdtValueError
def load_config(configfile, configspec, raise_exception=True):
"Load and check configvale acccording to spec"
config = ConfigObj(configfile, file_error=True, configspec=configspec)
validator = Validator()
results = config.validate(validator, preserve_errors=True)
msg = ""
fatalerr = False
for entry in flatten_errors(config, results):
[sectionList, key, error] = entry
if error is False:
msg += f"\n{key:>30s} missing in section [{']['.join(sectionList)}]"
fatalerr = True
if key is not None:
if isinstance(error, VdtValueError):
optionString = config.configspec[key]
msg += f"\nThe parameter {key} was set to {[config[s][key] for s in sectionList]} which is not one of the allowed values\n"
msg += " Please set the value to be in %s" % optionString
fatalerr = True
# verifying extra values below
wmsg = ""
for sections, name in get_extra_values(config):
# this code gets the extra values themselves
the_section = config
for section in sections:
the_section = the_section[section]
# the_value may be a section or a value
the_value = the_section[name]
section_or_value = 'value'
if isinstance(the_value, dict):
# Sections are subclasses of dict
section_or_value = 'section'
section_string = '[' + (']['.join(sections) or "TOP LEVEL") + ']'
wmsg += f"\n{name:>30s}: Extra {section_or_value} on section {section_string}"
if wmsg != "":
print(f"\nWARNINGS found in configuration file {configfile}")
print(wmsg)
if fatalerr:
print(f"\nERRORS found in configuration file {configfile}")
if raise_exception:
raise RuntimeError(msg)
else:
print("Fatal errors found, but no exception raised, as requested")
print(msg)
print(f'Configuration {configfile} validated successfully')
return config
if __name__ == "__main__":
configfile="xt_default.cfg"
configspec="xt_default_spec.cfg"
config = load_config(configfile, configspec)

Categories