Issue in writing json to excel file in python - python

I am trying to get json output of via one of API request , which i wanted then load that into excel file
The problem is the response i get from api, if i dump it to json.dumps() method, its becoming not parsable. But if i try to parse it as text, then tried to format it json formatter its parsing
Though i wrote code to write to csv below, but i wanted it to excel file..
Here is my sample respone.text variable in my actual code looks like:
{
"value": [
{
"correlationId": "xxxxxxxxxx",
"eventName": {
"value": "EndRequest",
"localizedValue": "EndRequest"
},
"id": "/subscriptions/xxxxxxxxxx/resourcegroups/xxxxxxxxx/providers/Microsoft.Compute/virtualMachines/xxxxxx/extensions/enablevmaccess/events/xxxxxxxxxx/ticks/xxxxxxxx",
"level": "Informational",
"resourceGroupName": "xxxxxx",
"resourceProviderName": {
"value": "Microsoft.Compute",
"localizedValue": "Microsoft.Compute"
},
"operationName": {
"value": "Microsoft.Compute/virtualMachines/extensions/write",
"localizedValue": "Microsoft.Compute/virtualMachines/extensions/write"
},
"status": {
"value": "Succeeded",
"localizedValue": "Succeeded"
},
"eventTimestamp": "2020-08-06T12:47:02.0657952Z",
"submissionTimestamp": "2020-08-06T12:49:03.137537Z"
},
{
"correlationId": "xxxxxxxxxx",
"eventName": {
"value": "EndRequest",
"localizedValue": "EndRequest"
},
"id": "/subscriptions/xxxxxxxxxx/resourcegroups/xxxxxxxxx/providers/Microsoft.Compute/virtualMachines/xxxxxx/extensions/enablevmaccess/events/xxxxxxxxxx/ticks/xxxxxxxx",
"level": "Informational",
"resourceGroupName": "xxxxxx",
"resourceProviderName": {
"value": "Microsoft.Compute",
"localizedValue": "Microsoft.Compute"
},
"operationName": {
"value": "Microsoft.Compute/virtualMachines/extensions/write",
"localizedValue": "Microsoft.Compute/virtualMachines/extensions/write"
},
"status": {
"value": "Succeeded",
"localizedValue": "Succeeded"
},
"eventTimestamp": "2020-08-06T12:47:02.0657952Z",
"submissionTimestamp": "2020-08-06T12:49:03.137537Z"
},
]
}
Here the code I am trying:
d_date = datetime.datetime.now()
today = d_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
print(today)
N = 10
date_N_days_ago = datetime.datetime.now() - timedelta(days=N)
start_date = date_N_days_ago.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
print(start_date)
vm_list = compute_client.virtual_machines.list_all()
for vm_general in vm_list:
general_view = vm_general.id.split("/")
resource_group = general_view[4]
print(resource_group)
BASE_URL = f"https://management.azure.com/subscriptions/{subscription_id}/providers/microsoft.insights/eventtypes/management/values?api-version=2015-04-01&$filter=eventTimestamp ge {start_date} and eventTimestamp le {today} and resourceGroupName eq {resource_group}&$select=eventName,id,resourceGroupName,resourceProviderName,operationName,status,eventTimestamp,correlationId,submissionTimestamp,level"
BASE_URL = BASE_URL
headers = {
"Authorization": 'Bearer ' + credential.token["access_token"]
}
response = requests.get(BASE_URL, headers=headers)
# if i convert below line to df_json = response.json() it says AttributeError: 'str' object has no attribute 'json'
df_json = response.text # this is a string but i am able to parse it properly in json forammter
print(df_json)
with open('c:\csv\logs_test.csv', 'w') as f:
for key in df_json.keys():
f.write("%s,%s\n" % (key, df_json[key]))
break
I am getting error like:
AttributeError: 'str' object has no attribute 'keys'
Expected result:
Actually I need to to write to xls (excel) format having columns as "correlationId,eventName,id,resourceGroupName,resourceProviderName,operationName,status,eventTimestamp,submissionTimestamp

You can actually use eval to convert the text to a dictionary and then use pandas to convert it to an excel file.
import pandas
response_dict = eval(response.text)
df = pd.DataFrame(response_dict['value'])
df['tag'] = "Managed by IT"
file_name = 'data.xls'
df.to_excel(file_name, index = False)

The easiest is to convert to pandas dataframe and then to xls file.
You will to have to install xlwt - pip install xlwt.
import pandas as pd
data = {
"value": [
{
"correlationId": "xxxxxxxxxx",
"eventName": {
"value": "EndRequest",
"localizedValue": "EndRequest"
},
"id": "/subscriptions/xxxxxxxxxx/resourcegroups/xxxxxxxxx/providers/Microsoft.Compute/virtualMachines/xxxxxx/extensions/enablevmaccess/events/xxxxxxxxxx/ticks/xxxxxxxx",
"level": "Informational",
"resourceGroupName": "xxxxxx",
"resourceProviderName": {
"value": "Microsoft.Compute",
"localizedValue": "Microsoft.Compute"
},
"operationName": {
"value": "Microsoft.Compute/virtualMachines/extensions/write",
"localizedValue": "Microsoft.Compute/virtualMachines/extensions/write"
},
"status": {
"value": "Succeeded",
"localizedValue": "Succeeded"
},
"eventTimestamp": "2020-08-06T12:47:02.0657952Z",
"submissionTimestamp": "2020-08-06T12:49:03.137537Z"
},
{
"correlationId": "xxxxxxxxxx",
"eventName": {
"value": "EndRequest",
"localizedValue": "EndRequest"
},
"id": "/subscriptions/xxxxxxxxxx/resourcegroups/xxxxxxxxx/providers/Microsoft.Compute/virtualMachines/xxxxxx/extensions/enablevmaccess/events/xxxxxxxxxx/ticks/xxxxxxxx",
"level": "Informational",
"resourceGroupName": "xxxxxx",
"resourceProviderName": {
"value": "Microsoft.Compute",
"localizedValue": "Microsoft.Compute"
},
"operationName": {
"value": "Microsoft.Compute/virtualMachines/extensions/write",
"localizedValue": "Microsoft.Compute/virtualMachines/extensions/write"
},
"status": {
"value": "Succeeded",
"localizedValue": "Succeeded"
},
"eventTimestamp": "2020-08-06T12:47:02.0657952Z",
"submissionTimestamp": "2020-08-06T12:49:03.137537Z"
}
]
}
df = pd.json_normalize(data['value'])
cols = ["correlationId","eventName.value","id","resourceGroupName","resourceProviderName.value","operationName.value","status.value","eventTimestamp","submissionTimestamp"]
df[cols].to_excel("data.xls", index=False)
Instead of json, use demjson. Install the library - pip install demjson because json parses correctly only if it's a proper json.
import demjson
data = demjson.decode(response.text)
# remaining code goes on

Related

API Call using request module in python

I am not very familiar with API calls or the requests module. I am trying to get the about information (details) for each DAO. I correctly get the names of the DAOs but I get KeyError when I try to do the details. Any help would be greatly appreciated.
import pandas as pd
import requests
payload = {"requests": [{"indexName": "governance_production", "params": "highlightPreTag=%3Cais-highlight-0000000000%3E&highlightPostTag=%3C%2Fais-highlight-0000000000%3E&hitsPerPage=855&attributesToRetrieve=%5B%22id%22%5D&maxValuesPerFacet=100&query=&page=0&facets=%5B%22types%22%2C%22tags%22%5D&tagFilters="}]}
url = 'https://3b439zgym3-2.algolianet.com/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(3.35.1)%3B%20Browser%20(lite)&x-algolia-application-id=3B439ZGYM3&x-algolia-api-key=14a0c8d17665d52e61167cc1b2ae9ff1'
headers = {"content-type": "application/x-www-form-urlencoded"}
req = requests.post(url, headers=headers, json=payload).json()
data = []
for item in req['results'][0]['hits']:
data.append({
"name": item['_highlightResult']['name']['value'],
"details": item['_highlightResult']['details']['value'],
})
print(data)
df = pd.DataFrame(data)
print(df)
Because there is no key named details exists in the resulted JSON, that's why it returns an error.
Here is a sample from the request you made above -
Either it includes tags key along with name and types
{
"_highlightResult": {
"assetSlug": {
"matchLevel": "none",
"matchedWords": [],
"value": "tribe"
},
"name": {
"matchLevel": "none",
"matchedWords": [],
"value": "Fei"
},
"tags": [
{
"matchLevel": "none",
"matchedWords": [],
"value": "DeFi"
}
],
"types": [
{
"matchLevel": "none",
"matchedWords": [],
"value": "Protocol"
}
]
},
"id": "f9779bc3-4eb4-4830-982b-fc981762dbd8",
"objectID": "f9779bc3-4eb4-4830-982b-fc981762dbd8"
}
or not including tags key
{
"_highlightResult": {
"assetSlug": {
"matchLevel": "none",
"matchedWords": [],
"value": "aave"
},
"name": {
"matchLevel": "none",
"matchedWords": [],
"value": "Aave Grants DAO"
},
"types": [
{
"matchLevel": "none",
"matchedWords": [],
"value": "Grants"
}
]
},
"id": "b3a88880-b343-4eba-955e-dd0c4970291a",
"objectID": "b3a88880-b343-4eba-955e-dd0c4970291a"
}
Here is the full body of JSON data -
JSON data

How to convert a JSON file from GET request into pandas dataframe?

I'm trying to convert json obtained from a python GET request (requests library) into a pandas dataframe.
I've tried some other solutions on the subject, including json_normalize, however it does not appear to be working. The dataframe appears as a single column with dictionary's.
response = requests.get(myUrl, headers=head)
data = response.json()
#what now?
gives me the following json:
"data": [
{
"timestamp": "2019-04-10T11:40:13.437Z",
"score": 87,
"sensors": [
{
"comp": "temp",
"value": 20.010000228881836
},
{
"comp": "humid",
"value": 34.4900016784668
},
{
"comp": "co2",
"value": 418
},
{
"comp": "voc",
"value": 166
},
{
"comp": "pm25",
"value": 4
},
{
"comp": "lux",
"value": 961.4000244140625
},
{
"comp": "spl_a",
"value": 45.70000076293945
}
],
"indices": [
{
"comp": "temp",
"value": -1
},
{
"comp": "humid",
"value": -2
},
{
"comp": "co2",
"value": 0
},
{
"comp": "voc",
"value": 0
},
{
"comp": "pm25",
"value": 0
}
]
}
How do i convert this into a dataframe? The end result is supposed to look have the following headers:
you can import json in order use json package.
json package has loads() method, you can use this method convert json object to dict object, then by giving key to this dict object to get value to put it into dataframe.

flask-sqlalchemy dynamically construct query

I have an input json like the following:
{
"page": 2,
"limit": 10,
"order": [
{
"field": "id",
"type": "asc"
},
{
"field": "email",
"type": "desc"
},
...
{
"field": "fieldN",
"type": "desc"
}
],
"filter": [
{
"field": "company_id",
"type": "=",
"value": 1
},
...
{
"field": "counter",
"type": ">",
"value": 5
}
]
}
How do I dynamically construct sqlalchemy query based on my input json if I don't know fields count?
Something like this:
User.query.filter(filter.field, filter.type, filter.value).filter(filter.field1, filter.type1, filter.value1)...filter(filter.fieldN, filter.typeN, filter.valueN).order_by("id", "ask").order_by("email", "desc").order_by("x1", "y1")....order_by("fieldN"...."desc").all()
Convert the json into a dictionary and retrieve the value.
If your json is in a file (say, data.json), the json library will satisfy your needs:
import json
f = open("data.json")
data = json.load(f)
f.close()
User.query.filter(company_id=1).order_by(data["id"], data["ask"]).order_by(data["email"], data["desc"]).all()
If your json is a string (say, json_data):
import json
data = json.loads(json_data)
User.query.filter(company_id=1).order_by(data["id"], data["ask"]).order_by(data["email"], data["desc"]).all()
If your json is a request from the python requests library i.e. res = requests.get(...), then res.json() will return a dictionary:
data = res.json()
User.query.filter(company_id=1).order_by(data["id"], data["ask"]).order_by(data["email"], data["desc"]).all()

Json to cvs using python

I'm trying to convert json to csv, but the code have **"header"**With my current knowladge I can't covert it into csv, because I don't now hot to handle "headers":
`
{
"__metadata": {
"uri": "http://ip:port/vvv/v1/folders?page=1&pagesize=50"
},
"first": {
"__deferred": {
"uri": "http://ip:port/vvv/v1/folders?page=1&pagesize=50"
}
},
"last": {
"__deferred": {
"uri": "http://ip:port/vvv/v1/folders?page=1&pagesize=50"
}
},
"entries": [
`
And the rest of code looks like this:
`
{
"__metadata": {
"uri": "http://ip:port/vvv/v1/folders/13483"
},
"cuid": "AfbTJW3iTE1MkiLULzA6P58",
"name": "Foldername1",
"description": "",
"id": "13483",
"type": "Folder",
"ownerid": "12",
"updated": "Wed Mar 01 09:14:23 CET 2017"
},
{
"__metadata": {
"uri": "http://ip:port/vvv/v1/folders/523"
},
"cuid": "AS1oZEJAynpNjZIaZK2rc7g",
"name": "foldername2",
"description": "",
"id": "523",
"type": "Folder",
"ownerid": "10",
"updated": "Wed Jan 18 00:11:06 CET 2017"
},
{
"__metadata": {
"uri": "http://ip:port/vvv/v1/folders/5356"
},
"cuid": "AeN4lEu0h_tAtnPEjFYxwi8",
"name": "foldername",
"description": "",
"id": "5356",
"type": "Folder",
"ownerid": "12",
"updated": "Fri Feb 10 17:28:53 CET 2017"
}
]
}
`
How can I convert above code into csv? How I can deal with "header"?
Python's json and csv libraries should handle this for you. Just load the json data in and access the entries tag directly. From there you can enumerate all the data and write it to a csv file.
This example shows how to also write all of the data in dataprovider before writing the expression list:
import json
import csv
data = """{
"dataprovider": {
"id": "DP0",
"name": "Query 1",
"dataSourceId": "5430",
"dataSourcePrefix": "DS0",
"dataSourceType": "unv",
"updated": "2010-12-03T13:07:43.000Z",
"duration": 1,
"isPartial": "false",
"rowCount": 1016,
"flowCount": 1,
"dictionary": {
"expression": [{
"#dataType": "String",
"#qualification": "Dimension",
"id": "DP0.DOa5",
"name": "Lines",
"description": "Product line. Each line contains a set of categories.",
"dataSourceObjectId": "DS0.DOa5",
"formulaLanguageId": "[Lines]"
},
{
"#dataType": "Numeric",
"#qualification": "Measure",
"#highPrecision": "false",
"id": "DP0.DO93",
"name": "Sales revenue",
"description": "Sales revenue $ - $ revenue of SKU sold",
"dataSourceObjectId": "DS0.DO93",
"formulaLanguageId": "[Sales revenue]",
"aggregationFunction": "Sum"
}]
},
"query": "SELECT ... FROM ... WHERE"
}
}
"""
my_json = json.loads(data)
entries = my_json['dataprovider']['dictionary']['expression']
header_1 = my_json['dataprovider'].keys()
header_1.remove("dictionary")
data_1 = [(k, str(my_json['dataprovider'][k])) for k in header_1]
header_2 = sorted(entries[0].keys())
with open('output.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
# Write initial header information
csv_output.writerows(data_1)
# Write an empty row
csv_output.writerow([])
# Write list information
csv_output.writerow(header_2)
for entry in entries:
csv_output.writerow([' '.join(str(entry.get(col, '')).splitlines()) for col in header_2])
The CSV file would then look something like:
updated,2010-12-03T13:07:43.000Z
name,Query 1
dataSourceType,unv
rowCount,1016
isPartial,false
dataSourceId,5430
query,SELECT ... FROM ... WHERE
duration,1
flowCount,1
dataSourcePrefix,DS0
id,DP0
#dataType,#qualification,dataSourceObjectId,description,formulaLanguageId,id,name
String,Dimension,DS0.DOa5,Product line. Each line contains a set of categories.,[Lines],DP0.DOa5,Lines
Numeric,Measure,DS0.DO93,Sales revenue $ - $ revenue of SKU sold,[Sales revenue],DP0.DO93,Sales revenue
If you are getting different JSON, you need to manually decide which part to extract, for example:
entries = my_json['documents']['document']

Iterating through JSON in Python using an OFFSET

I am trying to use the HubSpot CRM API to get "All Deals".
The API endpoint is: https://api.hubapi.com/deals/v1/deal/all?hapikey=demo
The JSON returned looks like this...
{
"deals": [
{
"portalId": 62515,
"dealId": 18039629,
"isDeleted": false,
"associations": {
"associatedVids": [],
"associatedCompanyIds": [],
"associatedDealIds": []
},
"properties": {
"dealname": {
"value": "Company",
"timestamp": 1457040864519,
"source": "API",
"sourceId": null
},
"amount": {
"value": "10",
"timestamp": 1457040864519,
"source": "API",
"sourceId": null
},
"closedate": {
"value": "",
"timestamp": 1457040864519,
"source": "API",
"sourceId": null
},
"hubspot_owner_id": {
"value": "11626092",
"timestamp": 1457046177648,
"source": "SALESFORCE",
"sourceId": null
},
"hs_lastmodifieddate": {
"value": "1457046177662",
"timestamp": 1457046177662,
"source": "CALCULATED",
"sourceId": null
},
"hubspot_owner_assigneddate": {
"value": "1457046177648",
"timestamp": 1457046177648,
"source": "SALESFORCE",
"sourceId": null
},
"num_associated_contacts": {
"value": "0",
"timestamp": 0,
"source": "CALCULATED",
"sourceId": null
},
"hs_createdate": {
"value": "1457040864535",
"timestamp": 1457040864535,
"source": null,
"sourceId": null
},
"createdate": {
"value": "1457040864535",
"timestamp": 1457040864535,
"source": null,
"sourceId": null
},
"hs_salesforceopportunityid": {
"value": "00628000007nRyuAAE",
"timestamp": 1457046177648,
"source": "SALESFORCE",
"sourceId": null
}
},
"imports": []
},
{
"portalId": 62515,
"dealId": 18040854,
"isDeleted": false,
"associations": {
"associatedVids": [],
"associatedCompanyIds": [],
"associatedDealIds": []
},
"properties": {
"dealname": {
"value": "5678",
"timestamp": 1457042290572,
"source": "API",
"sourceId": null
},
"amount": {
"value": "750000.0",
"timestamp": 1457042290572,
"source": "API",
"sourceId": null
},
"closedate": {
"value": "",
"timestamp": 1457042290572,
"source": "API",
"sourceId": null
},
"hs_lastmodifieddate": {
"value": "1457042290592",
"timestamp": 1457042290592,
"source": "CALCULATED",
"sourceId": null
},
"num_associated_contacts": {
"value": "0",
"timestamp": 0,
"source": "CALCULATED",
"sourceId": null
},
"hs_createdate": {
"value": "1457042290592",
"timestamp": 1457042290592,
"source": null,
"sourceId": null
},
"createdate": {
"value": "1457042290592",
"timestamp": 1457042290592,
"source": null,
"sourceId": null
}
},
"imports": []
}
],
"hasMore": true,
"offset": 1467187
}
And I understand that if hasMore==true, then you are supposed to grab the offset and include it in another API call something like this: https://api.hubapi.com/deals/v1/deal/all?hapikey=demo&offset=1467187
And then keep doing that until hasMore==false.
I am using the following code to extract the first chunk of JSON from the API:
import requests
url = "https://api.hubapi.com/deals/v1/deal/all"
querystring = {"hapikey":"demo"}
headers = {
'cache-control': "no-cache"
}
response = requests.request("GET", url, headers=headers, params=querystring)
print(response.text)
So... my question is that now I am getting my JSON, how do I:
1) Read one chunk of JSON
2) If hasMore==true then go do #1 again
3) ElseIf hasMore==false then combine ALL the JSON from ALL iterations of #1 above into one big JSON
4) Return the value from #3
Any help please?
Working solution
import json
import requests
url = "https://api.hubapi.com/deals/v1/deal/all"
querystring = {"hapikey":"demo"}
headers = {
'cache-control': "no-cache"
}
all_deals = []
response = requests.request("GET", url, headers=headers, params=querystring).json()
for deal in response['deals']:
all_deals.append(deal)
hasMore = response['hasMore']
offset = response['offset']
while hasMore:
querystring = {
"hapikey":"demo",
"offset":offset
}
response = requests.request("GET", url, headers=headers, params=querystring).json()
for deal in response['deals']:
all_deals.append(deal)
hasMore = response['hasMore']
offset = response['offset']
print(json.dumps(all_deals))

Categories