pandas .loc stops working when imbedded in loop

pandas .loc stops working when imbedded in loop - python

import requests
import json
import pandas as pd
CSV_output_df = pd.read_csv('output/DEMO_CSV.csv', index_col=None)
payload = {}
headers = {
'Authorization': 'Basic ***********************************************************'
}
for index, row in CSV_output_df.iterrows():
Package_label = CSV_output_df.loc[index, "Package Id"]
licenseNumber = CSV_output_df.loc[index, "licenseNumber"]
Package_label = str(Package_label)
licenseNumber = str(licenseNumber)
url = ("https://api-mi.metrc.com/packages/v1/" + Package_label + "?licenseNumber=" + licenseNumber)
response = requests.request("GET", url, headers=headers, data=payload)
json_data = (response.text.encode('utf8'))
json_data = str(json_data)
json_data = (json_data.strip('b'))
json_data = (json_data.strip("'"))
json_data = (json_data.strip('{'))
json_data = (json_data.strip('}'))
json_data = (json_data.replace('"Item":{', ''))
json_data = (json_data.split(','))
json_data_df = pd.DataFrame(json_data)
Id = json_data_df.loc[0, 0]
Id = Id.replace('"Id":', '')
CSV_output_df.loc[index, "api_id"] = Id
for index, row in CSV_output_df.iterrows():
api_id = CSV_output_df.loc[index, "api_id"]
licenseNumber = CSV_output_df.loc[index, "licenseNumber"]
api_id = str(api_id)
licenseNumber = str(licenseNumber)
url0 = ("https://api-mi.metrc.com/labtests/v1/results?packageId=" + api_id + "&licenseNumber=" + licenseNumber)
response0 = requests.request("GET", url0, headers=headers, data=payload)
json_data0 = (response0.text.encode('utf8'))
json_data0 = str(json_data0)
json_data0 = (json_data0.strip('b'))
json_data0 = (json_data0.strip("'"))
json_data0 = (json_data0.strip('{'))
json_data0 = (json_data0.strip('}'))
json_data0 = (json_data0.strip('['))
json_data0 = (json_data0.strip(']'))
json_data0 = (json_data0.split(','))
json_data_df0 = pd.DataFrame(json_data0)
data_point = (json_data_df0.loc[1187, 0])
Python noobie here and 1st-time poster. So the issue is, below command not working in my for loop but is working as a standalone command.
data_point = (json_data_df0.loc[1187, 0])
The traceback log is telling me
ValueError: 1187 is not in range
but there are 1326 rows in json_data_df0 and all values except 0, 0 do not work in the loop.

I think you are supposed to use .iloc if you want to access the columns/rows using integer. .loc is for accessing columns/rows using the label.
For your reference: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

Related

Using python to parse the getEmailActivityUserDetail report

I'm using a python script, to pull data from https://graph.microsoft.com. The output that is delivered duplicates itself (10) times for each parsed user. What step is missing to only capture the requested once?
import requests
import urllib
import json
import csv
import os
client_id = urllib.parse.quote_plus("#######################")
client_secret = urllib.parse.quote_plus("######################")
tenant = urllib.parse.quote_plus("#########################")
auth_uri = "https://login.microsoftonline.com/" + tenant + "/oauth2/v2.0/token"
auth_body = "grant_type=client_credentials&client_id=" + client_id + "&client_secret=" + client_secret + "&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default"
authorization = requests.post(auth_uri, data=auth_body, headers={"Content-Type": "application/x-www-form-urlencoded"})
token = json.loads(authorization.content)['access_token']
graph_uri = "https://graph.microsoft.com/v1.0/reports/getEmailActivityUserDetail(period=%27D30%27)"
response = requests.get(graph_uri, data=auth_body, headers={'Content-Type': "application/json", 'Authorization': 'Bearer ' + token})
print(response.text)
temp_usr_list = [
'User.One#domain.com',
'User.Two#domain.com'
]
report_user_list = []
for line in response.iter_lines():
line_fields = line.decode("utf-8").split(',')
for entry in line_fields:
if len(entry) < 1:
continue
if line_fields[1] in temp_usr_list:
d = dict(
user_principle_name = line_fields[1],
send_count = line_fields[6],
recv_count = line_fields[7],
read_count = line_fields[8],
assigned_products = line_fields[9]
)
report_user_list.append(d)
print(report_user_list)
OUTPUT:
{'user_principle_name': 'User.One#domain.com', 'send_count': '0', 'recv_count': '0', 'read_count': '0', 'assigned_products': 'MICROSOFT'},...
{'user_principle_name': 'User.Two#domain.com', 'send_count': '0', 'recv_count': '0', 'read_count': '0', 'assigned_products': 'MICROSOFT'},...

Try running it once in this format and let me know if you get the same output.
import requests
import urllib
import json
import csv
import os
# Parms
client_id = urllib.parse.quote_plus('#######################')
client_secret = urllib.parse.quote_plus('######################')
tenant = urllib.parse.quote_plus('#########################')
auth_uri = 'https://login.microsoftonline.com/' + tenant \
+ '/oauth2/v2.0/token'
auth_body = 'grant_type=client_credentials&client_id=' + client_id \
+ '&client_secret=' + client_secret \
+ '&scope=https%3A%2F%2Fgraph.microsoft.com%2F.default'
authorization = requests.post(auth_uri, data=auth_body,
headers={'Content-Type': 'application/x-www-form-urlencoded'
})
token = json.loads(authorization.content)['access_token']
graph_uri = \
'https://graph.microsoft.com/v1.0/reports/getEmailActivityUserDetail(period=%27D30%27)'
response = requests.get(graph_uri, data=auth_body,
headers={'Content-Type': 'application/json',
'Authorization': 'Bearer ' + token})
print response.text
temp_usr_list = ['User.One#domain.com', 'User.Two#domain.com']
report_user_list = []
for line in response.iter_lines():
line_fields = line.decode('utf-8').split(',')
for entry in line_fields:
if len(entry) < 1:
continue
if line_fields[1] in temp_usr_list:
d = dict(user_principle_name=line_fields[1],
send_count=line_fields[6],
recv_count=line_fields[7],
read_count=line_fields[8],
assigned_products=line_fields[9])
report_user_list.append(d)
print report_user_list

How to parse json response from two different api calls in a nested for loop?

I want to parse response json from two different api calls ,but has same dictionary names with different values in a nested for loop
I was able to get from the forst api call, but failing on second api call with same dict as issues in both call but the key values are different.
def classtest(self):
apione = "http://jira.com/rest/1"
headers = {
'content-type': "application/json",
}
requestglobal = requests.request("GET", apione, headers=headers)
responseglobal = requestglobal.text
responseglobal = json.loads(responseglobal)
for i in responseglobal['issues']:
issue1 = i[key]
print(issue1)
apitwo = "https://jira.com/rest/2" + str(issue1)
requesttwo = requests.request("GET", apitwo, headers=headers)
responsetwo = requesttwo.text
responsetwo = json.loads(responsetwo)
for i in responsetwo['issues']:
issue2 = i[key] + str(issue1)
print(issue2)
apitthree = "https://jira.com/rest/3" + str(issue1) + str(issue3)
requestthree = requests.request("GET", apitthree, headers=headers)
responsethree = requestthree.text
responsethree = json.loads(responsetthree)
for i in responsethree['issues']:
issue3 = i[key] + str(issue2)
print(issue3)
print("something from thirdloop")
print("something from second loop")
print ("something from first for loop")
I want issues from all the 3 calls, as each call is interdependent

GET request with chunks in Python

I'm getting the data from API using requests library doing like this:
import requests
url = "Some String"
headers = {
'Authorization':"Some Token"}
response = requests.request("GET", url, headers=headers)
But the file that I'm trying to get is very large so I receive the time exceptions error. How can I get it using chunks in request?
Thanks!

import requests
import datetime
import pandas as pd
url = "some URL"
headers = {
'Authorization':"Some Token"}
start_date = datetime.datetime(2018, 6, 1)
end_date = datetime.datetime.now()
temp_end_date = start_date + datetime.timedelta(days=7)
output = dict()
while temp_end_date <= end_date:
temp_url = url % (start_date.timestamp()*1000, temp_end_date.timestamp()*1000)
response = requests.get(temp_url, headers=headers)
temp_data = response.json()
for key, value in temp_data.items():
output_arr = output.get(key, [])
output_arr.extend(value)
output[key] = output_arr
start_date = temp_end_date + datetime.timedelta(seconds=1)
temp_end_date += datetime.timedelta(days=7)
data=output
df=pd.DataFrame(data)
df.head()

BadZipFile("File is not a zip file")

So I am pulling data from an api using the provided code:
#User Parameters
apiToken = apitokens
surveyId = j
fileFormat = 'json'
#Static Parameters
requestCheckProgress = 0
baseUrl = 'https://sdfs.asdfs.com/API/v3/responseexports/'
headers = {
'content-type': "application/json",
'x-api-token': apiToken,
}
#Creating Data Export
downloadRequestUrl = baseUrl
downloadRequestPayload = '{"format":"' + fileFormat + '","surveyId":"' + surveyId + '"}'
downloadRequestResponse = requests.request("POST", downloadRequestUrl, data=downloadRequestPayload, headers=headers)
progressId = downloadRequestResponse.json()['result']['id']
print (downloadRequestResponse.text)
#Checking on Data Export
while requestCheckProgress < 100:
requestCheckUrl = baseUrl + progressId
requestCheckResponse = requests.request("GET", requestCheckUrl, headers=headers)
requestCheckProgress = requestCheckResponse.json()['result']['percentComplete']
print ("Download is " + str(requestCheckProgress) + " complete")
#Downloading file
requestDownloadUrl = baseUrl + progressId + '/file'
requestDownload = requests.request("GET", requestDownloadUrl, headers=headers, stream=True)
with open('RequestFile.zip', "wb") as f:
for chunk in requestDownload.iter_content(chunk_size=1024):
f.write(chunk)
zipfile.ZipFile('RequestFile.zip').extractall("sdafsadf")
else:
print("error")
so basically before this code I am using some nested for loops to provide the apitokens and the surveyid for each survey. Therer are over 100 surveys being pulled. Sometimes, just randomly I will get thrown this:
zipfile.BadZipFile: File is not a zip file
and the download meter is at like 1350 percent. I can't seem to find what is wrong in their code. I am using python 3.6 and have tried using 3.5 too.

Twitter Search API using urllib2

I am beginner to API calls using python (or even just API calls). I am trying a basic call with the Twitter API.
My Code for generating oauth_signature is as follows :
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())
key_dict['oauth_signature'] = urllib.quote(finKey, '')
where key_dict stores all the keys :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9ab59691142584g739134971f75aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json?" + urllib.quote("q=delhi+elections", '')
I generate the Base Header String using the following :
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
Although when I am making the call, I get :
urllib2.HTTPError: HTTP Error 401: Unauthorized
OR
urllib2.URLError: <urlopen error [Errno 60] Operation timed out>
My final request is made using the following :
getSignature("delhi+elections")
headers = { 'Authorization' : getHeaderString()}
req = urllib2.Request(BASE_URL, headers= headers)
response = urllib2.urlopen(req)
Where am I going wrong ?

Few Points that should have been mentioned somewhere :
The method : binascii.b2a_base64(hashed.digest()) appends a new line feed at the end of the string. This cause the oauth_signature to fail Authenticate.
The delhi+elections is actually supposed to be delhi elections. This mismatch again made the Hash Value match in sha1 to fail.
Removing both of them solved the problem.
The final Code :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9aa39691142584s7df134971375aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json"
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())[:-1]
key_dict['oauth_signature'] = urllib.quote(finKey, '')
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
url = BASE_URL
getSignature("delhi elections")
headers = { 'Authorization' : getHeaderString()}
values = {'q':'delhi elections'}
data = urllib.urlencode(values)
req = urllib2.Request(url+"?"+data, headers= headers)
response = urllib2.urlopen(req)
the_page = response.read()
print the_page

Instead of coding your own client, have you tried using tweepy? For a reference implementation using this library, you can check twitCheck client.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

pandas .loc stops working when imbedded in loop - python

I think you are supposed to use .iloc if you want to access the columns/rows using integer. .loc is for accessing columns/rows using the label. For your reference: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

Related

Using python to parse the getEmailActivityUserDetail report

How to parse json response from two different api calls in a nested for loop?

GET request with chunks in Python

BadZipFile("File is not a zip file")

Twitter Search API using urllib2

Categories

Resources