Get python code to persist after IndexError - python

I am querying an API from a website. The API will be down for maintenance from time to time and also, there may not be data available for querying at times. I have written the code to keep forcing the program to query the API even after an error, however it doesn't seem to be working.
The following is the code:
import threading
import json
import urllib
from urllib.parse import urlparse
import httplib2 as http #External library
import datetime
import pyodbc as db
import os
import gzip
import csv
import shutil
def task():
#Authentication parameters
headers = { 'AccountKey' : 'secret',
'accept' : 'application/json'} #this is by default
#API parameters
uri = 'http://somewebsite.com/' #Resource URL
path = '/something/TrafficIncidents?'
#Build query string & specify type of API call
target = urlparse(uri + path)
print(target.geturl())
method = 'GET'
body = ''
#Get handle to http
h = http.Http()
#Obtain results
response, content = h.request(target.geturl(), method, body, headers)
api_call_time = datetime.datetime.now()
filename = "traffic_incidents_" + str(datetime.datetime.today().strftime('%Y-%m-%d'))
createHeader = 1
if os.path.exists(filename + '.csv'):
csvFile = open(filename + '.csv', 'a')
createHeader = 0
else:
#compress previous day's file
prev_filename = "traffic_incidents_" + (datetime.datetime.today()-datetime.timedelta(days=1)).strftime('%Y-%m-%d')
if os.path.exists(prev_filename + '.csv'):
with open(prev_filename + '.csv' , 'rb') as f_in, gzip.open(prev_filename + '.csv.gz', 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
os.remove(prev_filename + '.csv')
#create new csv file for writing
csvFile = open(filename + '.csv', 'w')
#Parse JSON to print
jsonObj = json.loads(content)
print (json.dumps(jsonObj, sort_keys=True, indent=4))
with open("traffic_incidents.json","w") as outfile:
#Saving jsonObj["d"]
json.dump(jsonObj, outfile, sort_keys=True, indent=4,ensure_ascii=False)
for i in range(len(jsonObj["value"])):
jsonObj["value"][i]["IncidentTime"] = jsonObj["value"][i]["Message"].split(' ',1)[0]
jsonObj["value"][i]["Message"] = jsonObj["value"][i]["Message"].split(' ',1)[1]
jsonObj["value"][i]["ApiCallTime"] = api_call_time
#Save to csv file
header = jsonObj["value"][0].keys()
csvwriter = csv.writer(csvFile,lineterminator='\n')
if createHeader == 1:
csvwriter.writerow(header)
for i in range(len(jsonObj["value"])):
csvwriter.writerow(jsonObj["value"][i].values())
csvFile.close()
t = threading.Timer(120,task)
t.start()
while True:
try:
task()
except IndexError:
pass
else:
break
I get the following error and the program stops:
"header = jsonObj["value"][0].keys()
IndexError: list index out of range"
I would like the program to keep running even after the IndexError has occured.
How can I edit the code to achieve that?

Related

IO Errno 13 Permission denied

have a script in which I am trying to extract two different flows from one site and put it in 2 different files. Unfortunately only 1 output file is generated and I get the errormessage:
Cleaning data..._LiveChat_agents
Traceback (most recent call last):
File "C:\Python_Prog\Customer_Care\LiveChat+Chats.py", line 45, in <module>
with open(messages, 'w') as outfile:
IOError: [Errno 13] Permission denied: 'LiveChat_agents14-12-2018.json'
this is my script:
*from azure.storage.blob import BlockBlobService
from azure.storage.blob import ContentSettings
import requests
import re
import os
import json
import datetime
import logging
#authenitcation service-to-service-to-serv
block_blob_service = BlockBlobService(account_name='datalake', account_key='XXXXXX'
now = datetime.datetime.now()
today = now.strftime("%d-%m-%Y")
logs = 'C:\\Python_Execution\\Log\\'
os.chdir(logs)
if not os.path.exists(today):
os.makedirs(today)
file_out = "C:\\Python_Execution\\Temp"
os.chdir(file_out)
#authenticate
#url = 'https://api.livechatinc.com/v2/chats'
headers = {'X-API-Version': '2',}
response_agents = requests.get('https://api.livechatinc.com/agents', headers=headers, auth=('jdoe#whatever.com', '000'))
#print(response_agents.text)
response_agents_toclean = response_agents.text
response_agents_clean = response_agents_toclean.encode('utf-8')
logging.info('Writing data to local file')
response_agents_final = json.loads(response_agents_clean)
messages = 'LiveChat_agents' + today +'.json'
print('Cleaning data..._LiveChat_agents')
response_agents_final = json.loads(response_agents_clean)
with open(messages, 'w') as outfile:
json.dump(response_agents_final, outfile)
#extract Chats
response_chats = requests.get('https://api.livechatinc.com/chats', headers=headers, auth=('jdoe#whatever.com', '000'))
#print(response_chats.text)
response_chats_toclean = response_chats.text
response_chats_clean = response_chats_toclean.encode('utf-8')
logging.info('Writing data to local file')
response_chats_final = json.loads(response_chats_clean)
messages = 'LiveChat_chats' + today +'.json'
print('Cleaning data...LiveChat_chats')
response_chats_final = json.loads(response_chats_clean)
with open(messages, 'w') as outfile:
json.dump(response_chats_final, outfile)
print('Uploading to blob storage...')
logging.info('Uploading to blob storage')
blob_container = 'landing-livechat'
#Upload the CSV file to Azure cloud
block_blob_service.create_blob_from_path(
blob_container,
messages,
file_out + '\\' + messages,
content_settings=ContentSettings(content_type='application/JSON')
)
# Check the list of blob
generator = block_blob_service.list_blobs(blob_container)
for blob in generator:
print(blob.name)
logging.info('Upload completed successfully')
print('Deleting temp file...')
logging.info('Deleting temp file')
os.chdir(file_out)
os.remove(messages)
logging.info('Completed Sucessfully!')*
appreciate your help.
gracias
M

How to create a loop with FOR in a temporary file?

I am working with an encrypted file, but I can't manage to create a loop with for in order to read it before it get closed and removed.
My intention is to read the data given in the encrypted file and loop it to assign each line to a variable.
Whenever I execute my code, Python just goes straight to finish, without working with the decrypted info; I believe it is because the with command close it before the loop starts.
This is what I want, not working, no errors either:
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with tempfile.TemporaryFile() as fp:
fp.write(encrypted)
for url in fp: #Python ignores the tempfile. I belive it is closed in the previous line.
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
This is the working code (Sorry, tried to make it shorter but couldn't):
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
encrypted = fernet.decrypt(data)
with open(output_file, 'wb') as fp:
fp.write(encrypted)
with open(output_file) as fp:
for url in fp:
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Header for both examples (apart to make it shorter):
#python 3.6.6
from urllib.request import urlopen
import urllib.request
from os.path import join as pjoin
import re, os, sys, tempfile, six, ctypes, time, fileinput
from cryptography.fernet import Fernet
print("[*] Checking list.dat for consistency . . .")
key = b'wTmVBRLytAmlfkctCuEf59K0LDCXa3sGas3kPg3r4fs=' #Decrypt list.dat
input_file = 'List.dat'
output_file = 'List.txt'
List.txt content:
errors
classes
stdlib
Any hints?
The problem is that once you have written to the file, the "file pointer" is at the end of the file. There's nothing to read.
You can use the seek method to reposition the file pointer at the beginning. Alternatively, closing and re-opening the file (as in your working code) will position the pointer at the beginning of the file.
#LarryLustig pretty much answered why your code wasn't working, but IMO if you eliminate the temp file altogether (which shouldn't be necessary) you don't even need to worry about the cursor. See below commented changes on your desired code.
# We'll use os.linesep to get the line terminator string for your os.
import os
...
with open(input_file, 'rb') as fp:
data = fp.read()
fernet = Fernet(key)
# decode your decrypted bytes into strings. Change 'utf-8' into whichever file encoding you're using if necessary.
decrypted = fernet.decrypt(data).decode('utf-8')
# Don't write to a temp file
# Iterate directly on each line of the extracted data
for url in decrypted.split(os.linesep):
segment = url.strip()
url = 'https://docs.python.org/3.3/tutorial/' + segment
filename = segment + '.html'
filePath = pjoin('Data/' + filename)
response = urlopen(url)
webContent = response.read()
html_content = urlopen(url).read()
matches = re.findall(b'string', html_content);
if len(matches) == 0:
print(segment + ' unchanged.')
else:
with open(filePath, 'wb') as w:
w.write(webContent)
Alternatively, if you know for sure what is the line terminator used in the file (e.g. \r\n, or \n) then you can eliminate using os.linesep altogether.

Whats the fastest way to get movie info from omdb with python?

I have about 200 thousand imdb_id in a file, and want to get JSON information from these imdb_id using omdb API.
I wrote this code and it works correctly, but it's very slow (3 Seconds for each id, it would take 166 Hours):
import urllib.request
import csv
import datetime
from collections import defaultdict
i = 0
columns = defaultdict(list)
with open('a.csv', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
for (k, v) in row.items():
columns[k].append(v)
with open('a.csv', 'r', encoding='utf-8') as csvinput:
with open('b.csv', 'w', encoding='utf-8', newline='') as csvoutput:
writer = csv.writer(csvoutput)
for row in csv.reader(csvinput):
if row[0] == "item_id":
writer.writerow(row + ["movie_info"])
else:
url = urllib.request.urlopen(
"http://www.omdbapi.com/?i=tt" + str(columns['item_id'][i]) + "&apikey=??????").read()
url = url.decode('utf-8')
writer.writerow((row + [url]))
i = i + 1
Whats the fastest way to get movie info from omdb with python ???
**Edited : I wrote this code and after get 1022 url resopnse i hava this error :
import grequests
urls = open("a.csv").readlines()
api_key = '??????'
def exception_handler(request, exception):
print("Request failed")
# read file and put each lines to an LIST
for i in range(len(urls)):
urls[i] = "http://www.omdbapi.com/?i=tt" + str(urls[i]).rstrip('\n') + "&apikey=" + api_key
requests = (grequests.get(u) for u in urls)
responses = grequests.map(requests, exception_handler=exception_handler)
with open('b.json', 'wb') as outfile:
for response in responses:
outfile.write(response.content)
Error is :
Traceback (most recent call last):
File "C:/python_apps/omdb_async.py", line 18, in <module>
outfile.write(response.content)
AttributeError: 'NoneType' object has no attribute 'content'
How can i solve this error ???
This code is IO bound and would benefit greatly from using Python's async/await capabilities. You can loop over your collection of URLs, creating an asynchronously executing request for each, much like the example in this SO question.
Once you're making these requests asynchronously, you may need to throttle your request rate to something within the OMDB API limit.

Create byte array for zip file

I'm trying to import a zip file in to Confluence with the RPC importSpace object, but it keeps giving errors.. Atlassian has the following documentation that you can use for this:
public boolean importSpace(String token, byte[] importData)
I have created a small Pyhton script that loops through a file where the zip filenames are saved:
#!/usr/bin/python
import xmlrpclib
import time
import urllib
confluence2site = "https://confluence"
server = xmlrpclib.ServerProxy(confluence2site + '/rpc/xmlrpc')
username = ''
password = ''
token = server.confluence2.login(username, password)
loginString = "?os_username=" + username + "&os_password=" + password
filelist = ""
start = True
with open('exportedspaces.txt') as f:
for file in f:
try:
print file
f = open(os.curdir+ "\\zips\\" + file, 'rb')
fileHandle = f.read()
f.close()
server.confluence2.importSpace(token, xmlrpclib.Binary(fileHandle))
except:
print file + " failed to restore"
failureList.append(file)
Where does it goes wrong?

Check response using urllib2

I am trying access a page by incrementing the page counter using opencorporates api. But the problem is there are times when useless data is there. For example in the below url for jurisdiction_code = ae_az I get webpage showing just this:
{"api_version":"0.2","results":{"companies":[],"page":1,"per_page":26,"total_pages":0,"total_count":0}}
which is technically empty. How to check for such data and skip over this to move on to next jurisdiction?
This is my code
import urllib2
import json,os
f = open('codes','r')
for line in f.readlines():
id = line.strip('\n')
url = 'http://api.opencorporates.com/v0.2/companies/search?q=&jurisdiction_code={0}&per_page=26&current_status=Active&page={1}?api_token=ab123cd45'
i = 0
directory = id
os.makedirs(directory)
while True:
i += 1
req = urllib2.Request(url.format(id, i))
print url.format(id,i)
try:
response = urllib2.urlopen(url.format(id, i))
except urllib2.HTTPError, e:
break
content = response.read()
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
Interpret the response you get back (you already know it's json) and check if the data you want is there.
...
content = response.read()
data = json.loads(content)
if not data.get('results', {}).get('companies'):
break
...
Here's your code written with Requests and using the answer here. It is nowhere near as robust or clean as it should be, but demonstrates the path you might want to take. The rate limit is a guess, and doesn't seem to work. Remember to put your actual API key in.
import json
import os
from time import sleep
import requests
url = 'http://api.opencorporates.com/v0.2/companies/search'
token = 'ab123cd45'
rate = 20 # seconds to wait after rate limited
with open('codes') as f:
codes = [l.strip('\n') for l in f]
def get_page(code, page, **kwargs):
params = {
# 'api_token': token,
'jurisdiction_code': code,
'page': page,
}
params.update(kwargs)
while True:
r = requests.get(url, params=params)
try:
data = r.json()
except ValueError:
return None
if 'error' in data:
print data['error']['message']
sleep(rate)
continue
return data['results']
def dump_page(code, page, data):
with open(os.path.join(code, str(page) + '.json'), 'w') as f:
json.dump(data, f)
for code in codes:
try:
os.makedirs(code)
except os.error:
pass
data = get_page(code, 1)
if data is None:
continue
dump_page(code, 1, data['companies'])
for page in xrange(1, int(data.get('total_pages', 1))):
data = get_page(code, page)
if data is None:
break
dump_page(code, page, data['companies'])
I think that actually this example is not "technically empty." It contains data and is therefore technically not empty. The data just does not include any fields that are useful to you. :-)
If you want your code to skip over responses that have uninteresting data, then just check whether the JSON has the necessary fields before writing any data:
content = response.read()
try:
json_content = json.loads(content)
if json_content['results']['total_count'] > 0:
fo = str(i) + '.json'
OUTFILE = os.path.join(directory, fo)
with open(OUTFILE, 'w') as f:
f.write(content)
except KeyError:
break
except ValueError:
break
etc. You might want to report the ValueError or the KeyError, but that's up to you.

Categories