Python Cubes OLAP Framework - How to sum a json column?

Python Cubes OLAP Framework - How to sum a json column? - python

I started using Python Cubes Olap recently.
I'm trying to sum/avg a JSON postgres column, how can i do this?
my db structure:
events
id
object_type
sn_name
spectra
id
snx_wavelengths (json column)
event_id
my json:
{
"dimensions": [
{
"name": "event",
"levels": [
{
"name": "object_type",
"label": "Object Type",
"attributes": [
"object_type"
]
},
{
"name": "sn_name",
"label": "name",
"attributes": [
"sn_name"
]
}
]
},
{
"name": "spectra",
"levels": [
{
"name": "catalog_name",
"label": "Catalog Name",
"attributes": [
"catalog_name"
]
},
{
"name": "capture_date",
"label": "Capture Date",
"attributes": [
"capture_date"
]
}
]
},
{
"name": "date"
}
],
"cubes": [
{
"id": "uid",
"name": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5",
"dimensions": [
"event",
"spectra",
"date"
],
"aggregates": [
{
"name": "event_snx_wavelengths_sum",
"function": "sum",
"measure": "event.snx_wavelengths"
},
{
"name": "record_count",
"function": "count"
}
],
"joins": [
{
"master": "14G31Yx98ZG8aEhFHjOWNNBmFOETg5APjZo5AiHaqog5YxLMK5.id",
"detail": "spectra.event_id"
},
],
"mappings": {
"event.sn_name": "sn_name",
"event.object_type": "object_type",
"spectra.catalog_name": "spectra.catalog_name",
"spectra.capture_date": "spectra.capture_date",
"event.snx_wavelengths": "spectra.snx_wavelengths",
"date": "spectra.capture_date"
},
}
]
}
I'm getting the follow error:
Unknown attribute ''event.snx_wavelengths''
Anyone can help?
I already tried use mongodb to do the sum, i didnt had success.

Related

Is there a way to add curly brackets around a list of dictionaries already existing within a JSON file?

I currently have two JSONS that I want to merge into one singular JSON, additionally I want to add in a slight change.
Firstly, these are the two JSONS in question.
An intents JSON:
[
{
"ID": "G1",
"intent": "password_reset",
"examples": [
{
"text": "I forgot my password"
},
{
"text": "I can't log in"
},
{
"text": "I can't access the site"
},
{
"text": "My log in is failing"
},
{
"text": "I need to reset my password"
}
]
},
{
"ID": "G2",
"intent": "account_closure",
"examples": [
{
"text": "I want to close my account"
},
{
"text": "I want to terminate my account"
}
]
},
{
"ID": "G3",
"intent": "account_creation",
"examples": [
{
"text": "I want to open an account"
},
{
"text": "Create account"
}
]
},
{
"ID": "G4",
"intent": "complaint",
"examples": [
{
"text": "A member of staff was being rude"
},
{
"text": "I have a complaint"
}
]
}
]
and an entities JSON:
[
{
"ID": "K1",
"entity": "account_type",
"values": [
{
"type": "synonyms",
"value": "business",
"synonyms": [
"corporate"
]
},
{
"type": "synonyms",
"value": "personal",
"synonyms": [
"vanguard",
"student"
]
}
]
},
{
"ID": "K2",
"entity": "beverage",
"values": [
{
"type": "synonyms",
"value": "hot",
"synonyms": [
"heated",
"warm"
]
},
{
"type": "synonyms",
"value": "cold",
"synonyms": [
"ice",
"freezing"
]
}
]
}
]
The expected outcome is to create a JSON file that mimics this structure:
{
"intents": [
{
"intent": "password_reset",
"examples": [
{
"text": "I forgot my password"
},
{
"text": "I want to reset my password"
}
],
"description": "Reset a user password"
}
],
"entities": [
{
"entity": "account_type",
"values": [
{
"type": "synonyms",
"value": "business",
"synonyms": [
"company",
"corporate",
"enterprise"
]
},
{
"type": "synonyms",
"value": "personal",
"synonyms": []
}
],
"fuzzy_match": true
}
],
"metadata": {
"api_version": {
"major_version": "v2",
"minor_version": "2018-11-08"
}
},
"dialog_nodes": [
{
"type": "standard",
"title": "anything_else",
"output": {
"generic": [
{
"values": [
{
"text": "I didn't understand. You can try rephrasing."
},
{
"text": "Can you reword your statement? I'm not understanding."
},
{
"text": "I didn't get your meaning."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"conditions": "anything_else",
"dialog_node": "Anything else",
"previous_sibling": "node_4_1655399659061",
"disambiguation_opt_out": true
},
{
"type": "event_handler",
"output": {
"generic": [
{
"title": "What type of account do you hold with us?",
"options": [
{
"label": "Personal",
"value": {
"input": {
"text": "personal"
}
}
},
{
"label": "Business",
"value": {
"input": {
"text": "business"
}
}
}
],
"response_type": "option"
}
]
},
"parent": "slot_9_1655398217028",
"event_name": "focus",
"dialog_node": "handler_6_1655398217052",
"previous_sibling": "handler_7_1655398217052"
},
{
"type": "event_handler",
"output": {},
"parent": "slot_9_1655398217028",
"context": {
"account_type": "#account_type"
},
"conditions": "#account_type",
"event_name": "input",
"dialog_node": "handler_7_1655398217052"
},
{
"type": "standard",
"title": "business_account",
"output": {
"generic": [
{
"values": [
{
"text": "We have notified your corporate security team, they will be in touch to reset your password."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655397279884",
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_4_1655399659061"
},
"conditions": "#account_type:business",
"dialog_node": "node_1_1655399028379",
"previous_sibling": "node_3_1655399027429"
},
{
"type": "standard",
"title": "intent_collection",
"output": {
"generic": [
{
"values": [
{
"text": "Thank you for confirming that you want to reset your password."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_3_1655397279884"
},
"conditions": "#password_reset",
"dialog_node": "node_3_1655396920143",
"previous_sibling": "Welcome"
},
{
"type": "frame",
"title": "account_type_confirmation",
"output": {
"generic": [
{
"values": [
{
"text": "Thank you"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655396920143",
"context": {},
"next_step": {
"behavior": "skip_user_input"
},
"conditions": "#password_reset",
"dialog_node": "node_3_1655397279884"
},
{
"type": "standard",
"title": "personal_account",
"output": {
"generic": [
{
"values": [
{
"text": "We have sent you an email with a password reset link."
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"parent": "node_3_1655397279884",
"next_step": {
"behavior": "jump_to",
"selector": "body",
"dialog_node": "node_4_1655399659061"
},
"conditions": "#account_type:personal",
"dialog_node": "node_3_1655399027429"
},
{
"type": "standard",
"title": "reset_confirmation",
"output": {
"generic": [
{
"values": [
{
"text": "Do you need assistance with anything else today?"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"digress_in": "does_not_return",
"dialog_node": "node_4_1655399659061",
"previous_sibling": "node_3_1655396920143"
},
{
"type": "slot",
"output": {},
"parent": "node_3_1655397279884",
"variable": "$account_type",
"dialog_node": "slot_9_1655398217028",
"previous_sibling": "node_1_1655399028379"
},
{
"type": "standard",
"title": "welcome",
"output": {
"generic": [
{
"values": [
{
"text": "Hello. How can I help you?"
}
],
"response_type": "text",
"selection_policy": "sequential"
}
]
},
"conditions": "welcome",
"dialog_node": "Welcome"
}
],
"counterexamples": [],
"system_settings": {
"off_topic": {
"enabled": true
},
"disambiguation": {
"prompt": "Did you mean:",
"enabled": true,
"randomize": true,
"max_suggestions": 5,
"suggestion_text_policy": "title",
"none_of_the_above_prompt": "None of the above"
},
"human_agent_assist": {
"prompt": "Did you mean:"
},
"intent_classification": {
"training_backend_version": "v2"
},
"spelling_auto_correct": true
},
"learning_opt_out": false,
"name": "Reset Password",
"language": "en",
"description": "Basic Password Reset Request"
}
So what I am missing in my original files, is essentially:
"intents":
and for the entities file:
"entities"
at the start of each list of dictionaries.
Additionally, I would need to wrap the whole thing in curly braces to comply with json formatting.
As seen, the final goal is not just appending these two to one another but the file technically continues with some other JSON code that I have yet to write and deal with.
My question now is as follows; by what method can I either add in these words and the braces to the individual files, then combine them into a singular JSON or alternatively by what method can I read in these files and combine them with the changes all in one go?
The new output file closing on a curly brace after the entities list of dicts is an acceptable outcome for me at the time, so that I can continue to make changes and hopefully further learn from this how to do these changes in future when I get there.
TIA

JSON is only a string format, you can it load in a language structure, in python that is list and dict, do what you need then dump it back, so you don't "add strings" and "add brackets", on modify the structure
file = 'intents.txt'
intents = json.load(open(file)) # load a list
file = 'entities.txt'
entities = json.load(open(file)) # load a list
# create a dict
content = {
"intents": intents,
"entities": entities
}
json.dump(content, open(file, "w"))

If you're reading all the json in as a string, you can just prepend "{'intents':" to the start and append a closing "}".
myJson = "your json string"
myWrappedJson = '{"intents":' + myJson + "}"

How to browse and get only json position 0 in python [duplicate]

This question already has answers here:
How to extract data from dictionary in the list
(3 answers)
Closed 11 months ago.
I have the following json output.
"detections": [
{
"source": "detection",
"uuid": "50594028",
"detectionTime": "2022-03-27T06:50:56Z",
"ingestionTime": "2022-03-27T07:04:50Z",
"filters": [
{
"id": "F2058",
"unique_id": "3638f7c0",
"level": "critical",
"name": "Possible Right-To-Left Override Attack",
"description": "Possible Right-To-Left Override Detected in the Filename",
"tactics": [
"TA0005"
],
"techniques": [
"T1036.002"
],
"highlightedObjects": [
{
"field": "fileName",
"type": "filename",
"value": [
"1465940311.,S=473394(NONAMEFL(Z00057-PI‮fdp.exe))"
]
},
{
"field": "filePathName",
"type": "fullpath",
"value": "/exports/10_19/mail/12/91/20193/new/1465940311.,S=473394(NONAMEFL(Z00057-PI‮fdp.exe))"
},
{
"field": "malName",
"type": "detection_name",
"value": "HEUR_RLOTRICK.A"
},
{
"field": "actResult",
"type": "text",
"value": [
"Passed"
]
},
{
"field": "scanType",
"type": "text",
"value": "REALTIME"
}
]
},
{
"id": "F2140",
"unique_id": "5a313874",
"level": "medium",
"name": "Malicious Software",
"description": "A malicious software was detected on an endpoint.",
"tactics": [],
"techniques": [],
"highlightedObjects": [
{
"field": "fileName",
"type": "filename",
"value": [
"1465940311.,S=473394(NONAMEFL(Z00057-PI‮fdp.exe))"
]
},
{
"field": "filePathName",
"type": "fullpath",
"value": "/exports/10_19/mail/12/91/rs001291-excluido-20193/new/1465940311.,S=473394(NONAMEFL(Z00057-PI‮fdp.exe))"
},
{
"field": "malName",
"type": "detection_name",
"value": "HEUR_RLOTRICK.A"
},
{
"field": "actResult",
"type": "text",
"value": [
"Passed"
]
},
{
"field": "scanType",
"type": "text",
"value": "REALTIME"
},
{
"field": "endpointIp",
"type": "ip",
"value": [
"xxx.xxx.xxx"
]
}
]
}
],
"entityType": "endpoint",
"entityName": "xxx(xxx.xxx.xxx)",
"endpoint": {
"name": "xxx",
"guid": "d1dd7e61",
"ips": [
"2xx.xxx.xxx"
]
}
}
Inside the 'filters' offset it brings me two levels, one critical and one medim, both with the variable 'name'.
I want to print only the first name, but when I print the 'name', it returns both names:
How do I print only the first one?
If I put print in for filters, it returns both names:
If I put print in for detections, it only returns the second 'name' and that's not what I want:

If you only want to print the name of the first filter, why iterate over it, just index it and print the value under "name":
for d in r['detections']:
print(d['filters'][0]['name'])

How to convert Json to Python object?

How to convert the complex Json format to python? I feel difficulty in converting the attached complex json to python object and I have to validate this data later against the DB.
Json:
{
"namespace":"Data.Datapoint",
"type":"record",
"name":"Blood Donar",
"fields":[
{
"name":"id",
"type":"int"
},
{
"name":"donor_number",
"type":"string"
},
{
"name":"birth_date",
"type":{
"type":"int",
"logicalType":"date"
},
"doc":"Birth Date"
},
{
"name":"height",
"type":[
"int",
"null"
],
"doc":"Height"
},
{
"name":"applicant_ts",
"type":[
{
"type":"long",
"logicalType":"timestamp-millis"
},
"null"
],
"doc":"Creation Timestamp"
},
{
"name":"arm_preference_ind",
"type":[
"string",
"null"
],
"doc":"Arm Preference; Selection from list"
},
{
"name":"abo_ind",
"type":[
"string",
"null"
],
"doc":"Blood Type/ABO"
},
{
"name":"vein_grading_ind",
"type":[
"string",
"null"
],
"doc":"Vein Grade"
}
]
}

import json
data = '''
{ "namespace": "Data.Datapoint", "type": "record", "name": "Blood Donar", "fields": [ { "name": "id", "type": "int" }, { "name": "donor_number", "type": "string" }, { "name": "birth_date", "type": { "type": "int", "logicalType": "date" }, "doc": "Birth Date" }, { "name": "height", "type": [ "int", "null" ], "doc": "Height" }, { "name": "applicant_ts", "type": [ { "type": "long", "logicalType": "timestamp-millis" }, "null" ], "doc": "Creation Timestamp" }, { "name": "arm_preference_ind", "type": [ "string", "null" ], "doc": "Arm Preference; Selection from list" }, { "name": "abo_ind", "type": [ "string", "null" ], "doc": "Blood Type/ABO" }, { "name": "vein_grading_ind", "type": [ "string", "null" ], "doc": "Vein Grade" } ] }
'''
json_data = json.loads(data)
json_data is your python dict obj.
if you want json data from web you can try this
import json
import requests
response = requests.get("https://jsonplaceholder.typicode.com/todos")
todos = json.loads(response.text)

Converting nested JSON structures to Pandas DataFrames

I've been struggling with the nested structure in json, how to convert to correct form
{
"id": "0c576f35-d704-4fa8-8cbb-311c6be36358",
"employee_id": null,
"creator_id": "16ca2db9-206c-4e18-891d-a00a5252dbd3",
"closed_by_id": null,
"request_number": 23,
"priority": "2",
"form_id": "urlaub-weitere-abwesenheiten",
"status": "opened",
"name": "Urlaub & weitere Abwesenheiten",
"read_by_employee": false,
"custom_status": {
"id": 15793,
"name": "In Bearbeitung HR"
},
"due_date": null,
"created_at": "2021-03-29T15:18:37.572040+02:00",
"updated_at": "2021-03-29T15:22:15.590156+02:00",
"closed_at": null,
"archived_at": null,
"attachment_count": 1,
"category": {
"id": "payroll-time-management",
"name": "Payroll, Time & Attendance"
},
"public_comment_count": 0,
"form_data": [
{
"field_id": "subcategory",
"values": [
"Time & Attendance - Manage monthly/year-end consolidation and report"
]
},
{
"field_id": "separator-2",
"values": [
null
]
},
{
"field_id": "art-der-massnahme",
"values": [
"Fortbildung"
]
},
{
"field_id": "bezeichnung-der-schulung-kurses",
"values": [
"dfgzhujiko"
]
},
{
"field_id": "startdatum",
"values": [
"2021-03-26"
]
},
{
"field_id": "enddatum",
"values": [
"2021-03-27"
]
},
{
"field_id": "freistellung",
"values": [
"nein"
]
},
{
"field_id": "mit-bildungsurlaub",
"values": [
""
]
},
{
"field_id": "kommentarfeld_fortbildung",
"values": [
""
]
},
{
"field_id": "separator",
"values": [
null
]
},
{
"field_id": "instructions",
"values": [
null
]
},
{
"field_id": "entscheidung-hr-bp",
"values": [
"Zustimmen"
]
},
{
"field_id": "kommentarfeld-hr-bp",
"values": [
"wsdfghjkmhnbgvfcdxsybvnm,"
]
},
{
"field_id": "individuelle-abstimmung",
"values": [
""
]
}
],
"form_files": [
{
"id": 30129,
"filename": "empty_background.png",
"field_id": "anhang"
}
],
"visible_by_employee": false,
"organization_ids": [],
"need_edit_by_employee": false,
"attachments": []
}
using a simple solution with pandas, dataframe
Request = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')
it's displaying almost in its correct form:
how to split a dictionary from columns form_data i form_files, I've done a lot of research, but I'm still having a lot of trouble solving this problem, how to split form_data for columns, no rows for meta to ID

You can do something like this.
pass the dataframe and the column to the function as arguments
def explode_node(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[str(column_value)].iloc[0], str):
child_df[column_value] = child_df[str(column_value)].apply(ast.literal_eval)
expanded_child_df = (pd.concat({i: json_normalize(x) for i, x in child_df.pop(str(column_value)).items()}).reset_index(level=1,drop=True).join(child_df, how='right', lsuffix='_left', rsuffix='_right').reset_index(drop=True))
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df

Convert CSV to Nested JSON complex structure using Pandas

Converted into a nested JSON file using Pandas
This is the sample csv for one row
name type aitm alitm aaitm adsc1
specs glass 70072187 ESA65Z45 ESA 65Z45 CUT TIP FG 1808-40
I'm trying to achieve the below structure of Nested JSON for every row

import pandas as pd
import json
df = pd.DataFrame([['specs','glass','70072187','ESA65Z45','ESA 65Z45','CUT TIP FG 1808-40'],
['specs','glass','666','ESA6665','ESB 666','CUT TIP FG 66-40']],
columns = ['name', 'type','aitm','alitm','aaitm','adsc1' ])
data = {'entities':[]}
for key,grp in df.groupby('name'):
for idx, row in grp.iterrows():
temp_dict_alpha = {'name':key, 'type':row['type'], 'data':{'attributes':{}}}
attr_row = row[~row.index.isin(['name','type'])]
for idx2, row2 in attr_row.iteritems():
dict_temp = {}
dict_temp[idx2] = {'values':[]}
dict_temp[idx2]['values'].append({'value':row2,'source':'internal','locale':'en_US'})
temp_dict_alpha['data']['attributes'].update(dict_temp)
data['entities'].append(temp_dict_alpha)
print(json.dumps(data, indent= 4))
Output:
print(json.dumps(data, indent= 4))
{
"entities": [
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "70072187",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESA 65Z45",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 1808-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
},
{
"name": "specs",
"type": "glass",
"data": {
"attributes": {
"aitm": {
"values": [
{
"value": "666",
"source": "internal",
"locale": "en_US"
}
]
},
"alitm": {
"values": [
{
"value": "ESA6665",
"source": "internal",
"locale": "en_US"
}
]
},
"aaitm": {
"values": [
{
"value": "ESB 666",
"source": "internal",
"locale": "en_US"
}
]
},
"adsc1": {
"values": [
{
"value": "CUT TIP FG 66-40",
"source": "internal",
"locale": "en_US"
}
]
}
}
}
}
]
}

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python Cubes OLAP Framework - How to sum a json column? - python

Related

Is there a way to add curly brackets around a list of dictionaries already existing within a JSON file?

How to browse and get only json position 0 in python [duplicate]

How to convert Json to Python object?

Converting nested JSON structures to Pandas DataFrames

Convert CSV to Nested JSON complex structure using Pandas

Categories

Resources