There is a script which makes API requests through iterating of a params dictionary.
If params are not compatible between each other (metrics and dimensions) or there is a mistake, it throws an exception:
googleapiclient.errors.HttpError: "Could not parse content (N/A) of field parameters.filters.">
And the script stops working.
It looks like this
def yt_return_api_response(yt_params):
responses = []
timestamp = []
try:
youtubeAnalytics = get_service()
for k, v in yt_params.items():
request = execute_api_request(
youtubeAnalytics.reports().query,
ids=v['ids'],
startDate=v['startDate'],
endDate=v['endDate'],
metrics=v['metrics'],
dimensions=v['dimensions'],
filters=v['filters'],
maxResults=v['maxResults'],
sort=v['sort'])
response = youtube_response(request)
responses.append(response)
# get the timestamp
timestamp_request = dt.datetime.now()
timestamp_request = timestamp_request.strftime('%Y-%m-%d %H:%M:%S.%f')
timestamp.append(timestamp_request)
return responses, timestamp
except Exception as e:
logging.error('Check the request params, unsupported query', exc_info=True)
I've tried to change it, in order after one iteration if there is a mistake it would not crash but continues to work.
With 'while True' it starts and just keeps working without any result.
def yt_return_api_response(yt_params, request_ids, filters):
responses = []
timestamp = []
while True:
try:
with 'finally' returns empty lists
def yt_return_api_response(yt_params):
responses = []
timestamp = []
try:
youtubeAnalytics = get_service()
for k, v in yt_params.items():
request = execute_api_request(
youtubeAnalytics.reports().query,
ids=v['ids'],
startDate=v['startDate'],
endDate=v['endDate'],
metrics=v['metrics'],
dimensions=v['dimensions'],
filters=v['filters'],
maxResults=v['maxResults'],
sort=v['sort'])
response = youtube_response(request)
responses.append(response)
# get the timestamp
timestamp_request = dt.datetime.now()
timestamp_request = timestamp_request.strftime('%Y-%m-%d %H:%M:%S.%f')
timestamp.append(timestamp_request)
except Exception as e:
logging.error('Check the request params, unsupported query', exc_info=True)
finally:
return responses, timestamp
Is there other way to handle exceptions?
You need to skip one itteration, your code when catch exception go out from loop. You can try to skip one iteration like that:
def yt_return_api_response(yt_params):
responses = []
timestamp = []
youtubeAnalytics = get_service()
for k, v in yt_params.items():
try:
request = execute_api_request(
youtubeAnalytics.reports().query,
ids=v['ids'],
startDate=v['startDate'],
endDate=v['endDate'],
metrics=v['metrics'],
dimensions=v['dimensions'],
filters=v['filters'],
maxResults=v['maxResults'],
sort=v['sort'])
response = youtube_response(request)
responses.append(response)
# get the timestamp
timestamp_request = dt.datetime.now()
timestamp_request = timestamp_request.strftime('%Y-%m-%d %H:%M:%S.%f')
timestamp.append(timestamp_request)
except Exception as e:
logging.error('Check the request params, unsupported query', exc_info=True)
return responses, timestamp
Related
searched various method, none working, dont understand which part went wrong.
it works for single time, how to loop through a list of ID, some ID may return error, skipped.
current code
x = 22555003
URL = "https://data.gcis.nat.gov.tw/od/data/api/5F64D864-61CB-4D0D-8AD9-492047CC1EA6?$format=json&$filter=Business_Accounting_NO eq {}".format(x)
response = requests.get(url = URL)
data = response.json()
result = pd.DataFrame(data)
result.head()
desire output but error
listID = ['22555003','12345678','27240313']
#12345678 is error ID
result = []
for x in listID:
try:
JSONContent = requests.get("https://data.gcis.nat.gov.tw/od/data/api/5F64D864-61CB-4D0D-8AD9-492047CC1EA6?$format=json&$filter=Business_Accounting_NO eq {}".format(x)).json()
result.append([JSONContent['Business_Accounting_NO'],
JSONContent['Capital_Stock_Amount']])
except:
pass
dataset = pd.DataFrame(result)
dataset.head()
why result empty?
thanks!!!
import pandas as pd
import requests
listID = ['22555003','12345678','27240313']
#12345678 is error ID
result = []
for x in listID:
try:
JSONContent = requests.get("https://data.gcis.nat.gov.tw/od/data/api/5F64D864-61CB-4D0D-8AD9-492047CC1EA6?$format=json&$filter=Business_Accounting_NO eq {}".format(x)).json()
#print(JSONContent[0]['Business_Accounting_NO'])
result.append([JSONContent[0]['Business_Accounting_NO'],JSONContent[0]['Capital_Stock_Amount']])
print(result)
except Exception as e:
print(e)
dataset = pd.DataFrame(result)
dataset.head()
print(result)
I have put together the below and wanted to test multithreading.
I am trying to make the for loop run threaded, so several URLs in the list can be processed in parallel.
This script doesn't error, but it doesn't do anything and I am not sure why.
If I remove the multithreading pieces, it works fine
Can anyone help me?
import multiprocessing.dummy as mp
import requests
import pandas as pd
import datetime
urls = [
'http://google.co.uk',
'http://bbc.co.uk/'
]
def do_print(s):
check_data = pd.DataFrame([])
now = datetime.datetime.now()
try:
response = requests.get(url)
except:
response = 'null'
try:
response_code = response.status_code
except:
response_code = 'null'
try:
response_content = response.content
except:
response_content = 'null'
try:
response_text = response.text
except:
response_text = 'null'
try:
response_content_type = response.headers['Content-Type']
except:
response_content_type = 'null'
try:
response_server = response.headers['Server']
except:
response_server = 'null'
try:
response_last_modified = response.headers['Last-Modified']
except:
response_last_modified = 'null'
try:
response_content_encoding = response.headers['Content-Encoding']
except:
response_content_encoding = 'null'
try:
response_content_length = response.headers['Content-Length']
except:
response_content_length = 'null'
try:
response_url = response.url
except:
response_url = 'null'
if int(response_code) <400:
availability = 'OK'
elif int(response_code) >399 and int(response_code) < 500:
availability = 'Client Error'
elif int(response_code) >499:
availability = 'Server Error'
if int(response_code) <400:
availability_score = 1
elif int(response_code) >399 and int(response_code) < 500:
availability_score = 0
elif int(response_code) >499:
availability_score = 0
d = {'check_time': [now], 'code': [response_code], 'type': [response_content_type], 'url': [response_url], 'server': [response_server], 'modified': [response_last_modified], 'encoding': [response_content_encoding], 'availability': [availability], 'availability_score': [availability_score]}
df = pd.DataFrame(data=d)
check_data = check_data.append(df ,ignore_index=True,sort=False)
if __name__=="__main__":
p=mp.Pool(4)
p.map(do_print, urls)
p.close()
p.join()
When I run code I get error because it try to convert int("null") - all because you have
except:
response_code = 'null'`
If I use except Exception as ex: print(ex) then I get error that variable url doesn't exists. And it is true because you have def do_print(s): but it should be def do_print(url):
BTW: instead of 'null' you could use standard None and later check if response_code: before you try to covnert it to integer. Or simply skip rest of code when you get error.
Other problem - process should use return df and you should get it
results = p.map(...)
and then use results to create DataFrame check_data
I'm new to python and I want this code to run only once and stops, not every 30 seconds
because I want to run multiple codes like this with different access tokens every 5 seconds using the command line.
and when I tried this code it never jumps to the second one because it's a while true:
import requests
import time
api_url = "https://graph.facebook.com/v2.9/"
access_token = "access token"
graph_url = "site url"
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
# Beware of rate limiting if trying to increase frequency.
refresh_rate = 30 # refresh rate in second
while True:
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open ("open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
time.sleep(refresh_rate)
From what I understood you're trying to execute the piece of code for multiple access tokens. To make your job simple, have all your access_tokens as lists and use the following code. It assumes that you know all your access_tokens in advance.
import requests
import time
def scrape_facebook(api_url, access_token, graph_url):
""" Scrapes the given access token"""
post_data = { 'id':graph_url, 'scrape':True, 'access_token':access_token }
try:
resp = requests.post(api_url, data = post_data)
if resp.status_code == 200:
contents = resp.json()
print(contents['title'])
else:
error = "Warning: Status Code {}\n{}\n".format(
resp.status_code, resp.content)
print(error)
raise RuntimeWarning(error)
except Exception as e:
f = open (access_token+"_"+"open_graph_refresher.log", "a")
f.write("{} : {}".format(type(e), e))
f.close()
print(e)
access_token = ['a','b','c']
graph_url = ['sss','xxx','ppp']
api_url = "https://graph.facebook.com/v2.9/"
for n in range(len(graph_url)):
scrape_facebook(api_url, access_token[n], graph_url[n])
time.sleep(5)
I'm using the multiprocessing.dummy module to do some concurrent processing. I'm making HTTP requests, and there is a possibility that the object will not have any data returned. In this case I need to capture the AttributeError and move on.
I tried capturing it in the object itself, and still received the error, the only thing that worked was a try/except on the pool.map call itself. I'm wondering why this is, and if this is the best way to do error handling for multiprocessing and map functions?
Here is some of my code for reference:
all_commits = []
projects = [Project(value['id']) for value in project_data.values()]
def process_projects(project):
if project.name in bad_names.keys():
project.name = bad_names[project.name]
project.return_results(rest, all_commits)
pool = ThreadPool(8)
pool.map(process_projects, projects)
pool.close()
pool.join()
print 'All data gathered.'
print 'Number of commits: {}'.format(len(all_commits))
fieldnames = get_fieldnames(
'ods_gerrit.staging_gerrit_commits',
settings.REDSHIFT_POSTGRES_INFO)
s3_file = ('staging_gerrit_commits_{}.csv.gz'.format(
date.today())
)
with gzip.open(s3_file, 'wb') as outf:
writer = DictWriter(
outf,
fieldnames=fieldnames,
extrasaction='ignore',
delimiter='|'
)
cnt = 0
pool = ThreadPool(8)
try:
pool.map(process_commits, all_commits)
except AttributeError:
pass
pool.close()
pool.join()
Then here is my Commit object code and the function that is being called by the map function:
class Commit(object):
def __init__(self, rev_id, change_id, full_id):
self.rev_id = rev_id
self.change_id = change_id
self.full_id = full_id
def clean_data(self, _dict):
for key, value in _dict.items():
if isinstance(value, dict):
self.clean_data(_dict[key])
else:
try:
_dict[key] = _dict[key].encode(
'utf_8',
'replace'
).encode('string_escape').replace('|', '[pipe]')
except AttributeError:
continue
def get_data(self, ger_obj):
print 'Getting data for a commit for {f_id}'.format(
f_id=self.full_id
)
endpoint = (r'/changes/{c_id}/revisions/{r_id}/commit'.format(
c_id=self.change_id,
r_id=self.rev_id
))
try:
self.data = ger_obj.get(endpoint)
except HTTPError:
try:
endpoint = (r'/changes/{f_id}/revisions/{r_id}/commit'.format(
f_id=self.full_id,
r_id=self.rev_id
))
self.data = ger_obj.get(endpoint)
except HTTPError:
logging.warning('Neither endpoint returned data: {ep}'.format(
ep=endpoint
))
raise HTTPError()
except ReadTimeout:
logging.warning('Read Timeout occurred for a commit. Endpoint: '
'{ep/}'.format(ep=endpoint))
return
self.data['change_id'] = self.change_id
self.data['proj_branch_id'] = self.full_id
self.data['revision_id'] = self.rev_id
self.data['commitid'] = self.data.get('commit')
self.data['name'] = self.data.get('committer')['name']
self.data['email'] = self.data.get('committer')['email']
self.data['date'] = self.data.get('committer')['date']
hash = md5()
hash.update(json.dumps(self.data).encode('utf-8'))
self.data['etl_checksum_md5'] = hash.hexdigest()
self.data['etl_process_status'] = settings.ETL_PROCESS_STATUS
self.data['etl_datetime_local'] = settings.ETL_DATETIME_LOCAL
self.data['etl_pdi_version'] = settings.ETL_PDI_VERSION
self.data['etl_pdi_build_version'] = settings.ETL_PDI_BUILD_VERSION
self.data['etl_pdi_hostname'] = settings.ETL_PDI_HOSTNAME
self.data['etl_pdi_ipaddress'] = settings.ETL_PDI_IPADDRESS
self.clean_data(self.data)
def write_data(self, writer):
print 'Writing a commit for {f_id}'.format(f_id=self.full_id)
writer.writerow(self.data)
And the controller function:
def process_commits(commit):
print 'On commit #{}'.format(cnt)
unique_id = commit.change_id + commit.rev_id
if not id_search(unique_ids, unique_id):
try:
commit.get_data(rest)
except HTTPError:
pass
try:
commit.write_data(writer=writer)
except UnicodeEncodeError:
logging.warning(
'{data} caused a Unicode Encode Error.'.format(
data=commit.data
))
pass
global cnt
cnt += 1
I tried to get the ice cast meta data of a mp3 stream with this script:
import requests
url = 'http://stream.jam.fm/jamfm-nmr/mp3-128/konsole/'
try:
response = requests.get(url, headers={'Icy-MetaData': 1}, stream=True)
response.raise_for_status()
except requests.RequestException, e:
print 'Error:', e
else:
headers, stream = response.headers, response.raw
meta_int = headers.get('icy-metaint')
if meta_int is not None:
audio_length = int(meta_int)
while True:
try:
audio_data = stream.read(audio_length)
meta_byte = stream.read(1)
if (meta_byte):
meta_length = ord(meta_byte) * 16
meta_data = stream.read(meta_length)
print meta_data
except KeyboardInterrupt:
break
response.close()
This works but just for the first package. I will never receive an update on the title information when the track changes. My question is: Is this intended behavior and the track info is just send once or did I something wrong? I would like to be able to notice a track change without polling the stream from time to time.
while True:
try:
#new request
response = requests.get(url, headers={'Icy-MetaData': 1}, stream=True)
response.raise_for_status()
headers, stream = response.headers, response.raw
meta_int = headers.get('icy-metaint')
audio_data = stream.read(audio_length)
meta_byte = stream.read(1)
if (meta_byte):
meta_length = ord(meta_byte) * 16
meta_data = stream.read(meta_length)
print (meta_data)
except KeyboardInterrupt:
break