Parsing values from JSON using Python - python

I wish to get the value of consumptionSavings from the following JSON format stored as .txt file.
{
"_id": "58edf905746de21c401a3dce",
"sites": [{
"ecms": [{
"consumptionSavings": 148,
"equipmentCost": 3455,
{
"energySource": "Electricity",
"consumptionReduction": {
"amount": 345435,
"unit": "MWh"
},
"projectDurationMonths": 36
}
}
}
]
]
}
I wrote the following code to extract the value of consumptionSavings;
import xlwings as xw
import pandas as pd
import json
data = json.load(open('data.txt'))
# Create a Pandas dataframe from the data.
df = pd.DataFrame({'data':[data["sites"]["ecms"]["consumptionSavings"]]})
wb = xw.Book('Values.xlsx')
ws = wb.sheets['Sheet1']
ws.range('C3').options(index=False).value = df
wb = xw.Book('Result.xlsx')
wb.save()
xw.apps[0].quit()
and It returns the following error:
TypeError: list indices must be integers or slices, not str
I am bit confused how that could be. Thank you

Related

What changes i need to do so that i can Change this python code for DATABRICKS

Hello this is the python code i developed in my local machine, but now i am trying to work this code on DATABRICKS. But i am new to DATABRICKS so dont know how can i do it.
What i am trying to do is that i have a sample of huge JSON file and i am splitting it in two parts one contain headers and second file contains all the details.
Here is my local machine python code.
import json
import itertools
with open('new_test.json', 'r') as fp:
data = json.loads(fp.read())
d1 = dict(itertools.islice(data.items(), 8))
print(d1)
d2 = dict(itertools.islice(data.items(), 8, len(data.items())))
print(d2)
with open("new_test_header.json", "w") as header_file:
json.dump(d1, header_file)
with open("new_test_detail.json", "w") as detail_file:
json.dump(d2, detail_file)
Here is the JSON file.
{
"reporting_entity_name": "launcher",
"reporting_entity_type": "launcher",
"plan_name": "launched",
"plan_id_type": "hios",
"plan_id": "1111111111",
"plan_market_type": "individual",
"last_updated_on": "2020-08-27",
"version": "1.0.0",
"in_network": [
{
"negotiation_arrangement": "ffs",
"name": "Boosters",
"billing_code_type": "CPT",
"billing_code_type_version": "2020",
"billing_code": "27447",
"description": "Boosters On Demand",
"negotiated_rates": [
{
"provider_groups": [
{
"npi": [
0
],
"tin": {
"type": "ein",
"value": "11-1111111"
}
}
],
"negotiated_prices": [
{
"negotiated_type": "negotiated",
"negotiated_rate": 123.45,
"expiration_date": "2022-01-01",
"billing_class": "organizational"
}
]
}
]
}
]
}
Here is what i am trying to write in DATABRICKS
import json
import itertools
from pyspark.sql.functions import explode, col
df_json = spark.read.option("multiline","true").json("/mnt/BigData_JSONFiles/SampleDatafilefrombigfile.json")
display(df_json)
d1 = dict(itertools.islice(df_json.items(), 4))
d2 = dict(itertools.islice(df_json.items(), 4, len(df_json.items())))
# I am unable to write the WRITE function.
A help or guidance will be very helpful.
Here is a snippet example:
from pyspark.sql.functions import explode, col
# Read the JSON file from Databricks storage
df_json = spark.read.json("/mnt/BigData_JSONFiles/new_test.json")
# Convert the dataframe to a dictionary
data = df_json.toPandas().to_dict()
# Split the data into two parts
d1 = dict(itertools.islice(data.items(), 8))
d2 = dict(itertools.islice(data.items(), 8, len(data.items())))
# Convert the first part of the data back to a dataframe
df1 = spark.createDataFrame([d1])
# Write the first part of the data to a JSON file in Databricks storage
df1.write.format("json").save("/mnt/BigData_JSONFiles/new_test_header.json")
# Convert the second part of the data back to a dataframe
df2 = spark.createDataFrame([d2])
# Write the second part of the data to a JSON file in Databricks storage
df2.write.format("json").save("/mnt/BigData_JSONFiles/new_test_detail.json")

Filter Json with Ids contained in csv sheet using python

I have a csv file with some "id". I imported a json file and I needed to filter from this Json only the ids that are in the worksheet
Does anyone knows how to do that? I have no idea, I am very new in python. I am usin Jupyter notebook
How to filter data fetching from variable var_filter
import json
import pandas as pd
from IPython.display import display
# read csv with ids
var_filter = pd.read_csv('file.csv')
display(act_filter)
# Load json
with open('file.json') as f:
data = json.load(f)
print(data)
The json structure is:
[
{
"id": "179328741654819",
"t_values": [
{
"t_id": "963852456741",
"value": "499.66",
"date_timestamp": "2020-09-22T15:18:17",
"type": "in"
},
{
"t_id": "852951753456",
"value": "1386.78",
"date_timestamp": "2020-10-31T14:46:44",
"type": "in"
}
]
},
{
"id": "823971648264792",
"t_values": [
{
"t_id": "753958561456",
"value": "672.06",
"date_timestamp": "2020-03-16T22:41:16",
"type": "in"
},
{
"t_id": "321147951753",
"value": "773.88",
"date_timestamp": "2020-05-08T18:29:31",
"type": "out"
},
{
"t_id": "258951753852",
"value": "733.13",
"date_timestamp": null,
"type": "in"
}
]
}
]
You can iterate over the elements in the data variable and check if its id value is in the dataframe's id column. Simple method below, see this article for other methods
Note that I convert the value of the JSONs id to an int as that is what pandas is using as value type for the column
code
import json
from pprint import pprint
import pandas as pd
var_filter = pd.read_csv("id.csv")
# Load json
with open("data.json") as f:
data = json.load(f)
result = []
for elem in data:
if int(elem["id"]) in var_filter["id"].values:
result.append(elem)
pprint(result)
id.csv
id
823971648264792
output
[{'id': '823971648264792',
't_values': [{'date_timestamp': '2020-03-16T22:41:16',
't_id': '753958561456',
'type': 'in',
'value': '672.06'},
{'date_timestamp': '2020-05-08T18:29:31',
't_id': '321147951753',
'type': 'out',
'value': '773.88'},
{'date_timestamp': None,
't_id': '258951753852',
'type': 'in',
'value': '733.13'}]}]

How to read first array object from JSON using Python?

I have a JSON file and I need to convert that into CSV. But my JSON file contains JSON object which is an array and my all attributes are in that array but the code I am trying converts the first object into a single value but in actual I want all those attributes from JSON object.
JSON file content
{
"leads": [
{
"id": "31Y2V29CH0X82",
"product_type": "prelist"
},
{
"id": "2N649TAJBA50Z",
"product_type": "prelist"
}
],
"has_next_page": true,
"next_cursor": "2022-07-27T20:02:13.856000-07:00"
}
Python code
import pandas as pd
df = pd.read_json (r'C:\Users\Ron\Desktop\Test\Product_List.json')
df.to_csv (r'C:\Users\Ron\Desktop\Test\New_Products.csv', index = None)
The output I am getting is as following
And the output I want
I want the attributes as CSV content with headers?
I think you'll have to do this row by row.
data = {"leads": [{"id": "31Y2V29CH0X82", "product_type": "prelist"}, {"id": "2N649TAJBA50Z", "product_type": "prelist"}], "has_next_page": True,
"next_cursor": "2022-07-27T20:02:13.856000-07:00"}
headers = data.copy()
del headers['leads']
rows = []
for row in data['leads']:
row.update( headers )
rows.append( row )
import pandas as pd
df = pd.DataFrame( rows )
print(df)
Output:
id product_type has_next_page next_cursor
0 31Y2V29CH0X82 prelist True 2022-07-27T20:02:13.856000-07:00
1 2N649TAJBA50Z prelist True 2022-07-27T20:02:13.856000-07:00

flattening JSON file using json_normalise and choosing specific elements to convert to an excel sheet (Sample Attached)

{
"currency": {
"Wpn": {
"units": "KB_per_sec",
"type": "scalar",
"value": 528922.0,
"direction": "up"
}
},
"catalyst": {
"Wpn": {
"units": "ns",
"type": "scalar",
"value": 70144.0,
"direction": "down"
}
},
"common": {
"Wpn": {
"units": "ns",
"type": "scalar",
"value": 90624.0,
"direction": "down"
}
}
}
So I have to basically convert nested json into excel, for which my approach was to flatten json file using json_normalise , but as I am new to all these...I always seem to end up in KeyError...
Here's my code so far , assuming that the file is named as json.json
import requests
from pandas import json_normalize
with open('json.json', 'r') as f:
data = json.load(f)
df = pd.DataFrame(sum([i[['Wpn'], ['value']] for i in data], []))
df.to_excel('Ai.xlsx')
I'm trying to get output on an excel sheet consisting of currency and common along with their resp. values as an output
I know , there are alot of similar questions , but trust me I have tried most of them and yet I didn't get any desirable output... Plz just help me in this
Try:
import json
import pandas as pd
with open('json.json', 'r') as f: data = json.load(f)
data = [{'key': k, 'wpn_value': v['Wpn']['value']} for k, v in data.items()]
print(data)
# here, the variable data looks like
# [{'key': 'currency', 'wpn_value': 528922.0}, {'key': 'catalyst', 'wpn_value': 70144.0}, {'key': 'common', 'wpn_value': 90624.0}]
df = pd.DataFrame(data).set_index('key') # set_index() optional
df.to_excel('Ai.xlsx')
The result looks like
key
wpn_value
currency
528922
catalyst
70144
common
90624

Add missing fields with null values as per position mentioned in the config file in Python while parsing the JSON file data

I Have a config file
Position,ColumnName
1,TXS_ID
4,TXX_NAME
8,AGE
As per the above position i have 1 , 4, 8 --- we have only 3 columns are available. In between 1 & 4 we don't have 2,3 position where i want to fill them with Null Values .
As per the above config file i am trying to parse the data from a Json file by using Python but i have a scenario where i need to define the columns on the base of position as mentioned above. When python script is running if the "TXS_ID" is available it should pick the data from the JSON file & as i dont have 2& 3 fields i want to keep them as Null.
Sample output file
TSX_ID,,,TXX_NAME,,,,AGE
10000,,,AAAAAAAAA,,,,40
As per the config file i specify , data should be extracted from Json file and if the position is missing as per above example then it should be filling with nulls. Please help me if there is any possibility i can achieve.
Below is the sample Json File.
{
"entities": [
{
"id": "XXXXXXXXXXXXXXX",
"data": {
"attributes": {
"TSX_ID": {
"values": [
{
"value": 10000
}
]
},
"TXX_NAME": {
"values": [
{
"value": "AAAAAAAAA"
}
]
},
"AGE": {
"values": [
{
"value": "40"
}
]
}
}
}
}
]
}
Assuming that the config file line 1,TXS_ID has a typo and is actually 1,TSX_ID, this program works with your sample data (see explanations in comments):
import pandas
# read the "config file" into a Series of the "ColumnName"s:
config = pandas.read_csv('config', index_col='Position', squeeze=True)
maxdex = config.index[-1] # get the maximum Position
# fill the Positions missing in the "config file" with empty "ColumnName"s:
config = config.reindex(range(1, maxdex+1), fill_value='')
import json
sample = json.load(open('sample.json'))
# create an empty DataFrame with the desired columns:
output = pandas.DataFrame(columns=config.values)
# now insert the nested JSON data values into the given columns:
for a in config.values:
if a: # only if not an empty column name, of course
output[a] = [av['value'] for e in sample['entities']
for av in e['data']['attributes'][a]['values']]
output.to_csv('output.csv', index=False)

Categories