How do I prepend a Json object with ""Users" : " in Python? - python

I need to prepend a Json object with ""Users" : " but I can't figure out how to handle the ":". The closest I've gotten is getting the colon within the quotes and then it spits out an extra comma. Any ideas? So the issue is the colon sits in the quotes and it adds in a comma, which the api endpoint won't accept.
Here is what It should look like
**["users" :** [{
"email": "hallbeth#placeholder.email",
"dataFields": {
"favoriteTomatoe": "Green Zebra",
"daysSinceLastOrder": "137",
"city": "Lake Michaelberg",
"firstName": "Richard",
"zip": "58570",
"lastName": "Tyler",
"age": "50",
"state": "UT",
"totalTomatoOrders": "23",
"streetAddress": "925 Holland Burgs Suite 652",
"phoneNumber": "+67(4)7940410189",
"gender": "male",
"customMessageOne": "Esse magnam voluptatibus id ex ipsam assumenda excepturi tenetur."
}
}]
And here is what the output looks like
**["users :",** [{
"email": "hallbeth#placeholder.email",
"dataFields": {
"favoriteTomatoe": "Green Zebra",
"daysSinceLastOrder": "137",
"city": "Lake Michaelberg",
"firstName": "Richard",
"zip": "58570",
"lastName": "Tyler",
"age": "50",
"state": "UT",
"totalTomatoOrders": "23",
"streetAddress": "925 Holland Burgs Suite 652",
"phoneNumber": "+67(4)7940410189",
"gender": "male",
"customMessageOne": "Esse magnam voluptatibus id ex ipsam assumenda excepturi tenetur."
}
}]
Here is my code
import requests
import json
import csv
import pdb
limit = 2
curVal = 0
user_list = []
user_list_2 = [
("users" + ' ' + ':')]
with open('john.csv', 'r') as csv_file:
csv_file = csv.reader(csv_file)
next(csv_file)
for line in csv_file :
user_list.append(
[{
"email" : line[2],
"dataFields" : {
"firstName": line[0],
"lastName" : line[1],
"favoriteTomatoe" : line[3],
"totalTomatoOrders" : line[4],
"daysSinceLastOrder" : line[5],
"zip" : line[6],
"phoneNumber" : line[7],
"age" : line[8],
"streetAddress" : line[9],
"city" : line[10],
"state" : line[11],
"customMessageOne" : line[12],
"gender" : line[13]
}
}])
if curVal == limit:
body = json.dumps(user_list_2 + user_list)
print(body)
headers = {
"Content-Type": "application/json",
"Accept": "application/json"}
res = requests.request("POST",
"https://api.iterable.com/api/users/bulkUpdate?apiKey="key",
headers=headers, data=body)
curVal = 0
user_list = []
print(res.url + "\n\n" + str(res.status_code) + res.text)
else:
curVal = curVal + 1

It seems there are multiple misunderstanding in your code.
First, each user is added to the user_list as a single-item-list containing one user-dictionary. You could simply skip the single-item-list level and simply append the dictionary:
user_list.append({
"email" : line[2],
"dataFields" : {
"firstName": line[0],
"lastName" : line[1],
"favoriteTomatoe" : line[3],
"totalTomatoOrders" : line[4],
"daysSinceLastOrder" : line[5],
"zip" : line[6],
"phoneNumber" : line[7],
"age" : line[8],
"streetAddress" : line[9],
"city" : line[10],
"state" : line[11],
"customMessageOne" : line[12],
"gender" : line[13]
}
})
Then you can consider a sort of mapping between Python and JSON types:
Python list = JSON array
Python dict = JSON object
So, the user_list can be interpreted as a JSON array and if you want it to be assigned as the Users property of a JSON object, you just have to assign the user_list as the value of a Python dict's Users key. Then passing the Python dict
to the json.dumps function should return the wanted JSON data:
body = json.dumps({'Users': user_list})

Related

create dataframe in pandas using multilevel dict dynamic

I am fetching api and trying that response into csv but on catch is there this is multilevel dict or json when i am converting into csv most of the look like list of dict or dicts
I am trying using this
def expand(data):
d = pd.Series(data)
t = d.index
for i in t:
if type(d[i]) in (list,dict):
expend_s = pd.Series(d[i])
t.append(expend_s.index)
d = d.append(expend_s)
d = d.drop([i])
return d
df['person'].apply(expand)
but this solution is not working. if we see person col there is multiple dict or list of dict like
"birthDate": "0000-00-00",
"genderCode": {
"codeValue": "M",
"shortName": "Male",
"longName": "Male"
},
"maritalStatusCode": {
"codeValue": "M",
"shortName": "Married"
},
"disabledIndicator": False,
"preferredName": {},
"ethnicityCode": {
"codeValue": "4",
"shortName": "4",
"longName": "Not Hispanic or Latino"
},
"raceCode": {
"identificationMethodCode": {},
"codeValue": "1",
"shortName": "White",
"longName": "White"
},
"militaryClassificationCodes": [],
"governmentIDs": [
{
"itemID": "9200037107708_4385",
"idValue": "XXX-XX-XXXX",
"nameCode": {
"codeValue": "SSN",
"longName": "Social Security Number"
},
"countryCode": "US"
}
],
"legalName": {
"givenName": "Jack",
"middleName": "C",
"familyName1": "Abele",
"formattedName": "Abele, Jack C"
},
"legalAddress": {
"nameCode": {
"codeValue": "Personal Address 1",
"shortName": "Personal Address 1",
"longName": "Personal Address 1"
},
"lineOne": "1932 Keswick Lane",
"cityName": "Concord",
"countrySubdivisionLevel1": {
"subdivisionType": "StateTerritory",
"codeValue": "CA",
"shortName": "California"
},
"countryCode": "US",
"postalCode": "94518"
},
"communication": {
"mobiles": [
{
"itemID": "9200037107708_4389",
"nameCode": {
"codeValue": "Personal Cell",
"shortName": "Personal Cell"
},
"countryDialing": "1",
"areaDialing": "925",
"dialNumber": "6860589",
"access": "1",
"formattedNumber": "(925) 686-0589"
}
]
}
}
your suggestion and advice would be so helpful
I think we can solve multiple dict using read as pd.josn_normalise and list of dict using the below functions first we get those columns which have list
def df_list_and_dict_col(explode_df: pd.DataFrame, primary_key: str,
col_name: str, folder: str) -> pd.DataFrame:
""" convert list of dict or list of into clean dataframe
Keyword arguments:
-----------------
dict: explode_df -- dataframe where we have to expand column
dict: col_name -- main_file name where most of data is present
Return: pd.DataFrame
return clean or expand dataframe
"""
explode_df[col_name] = explode_df[col_name].replace('', '[]', regex=True)
explode_df[col_name] = explode_df[col_name].fillna('[]')
explode_df[col_name] = explode_df[col_name].astype(
'string') # to make sure that entire column is string
explode_df[col_name] = explode_df[col_name].apply(ast.literal_eval)
explode_df = explode_df.explode(col_name)
explode_df = explode_df.reset_index(drop=True)
normalized_df = pd.json_normalize(explode_df[col_name])
explode_df = explode_df.join(
other=normalized_df,
lsuffix="_left",
rsuffix="_right"
)
explode_df = explode_df.drop(columns=col_name)
type_df = explode_df.applymap(type)
col_list = []
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
# print(col_list,explode_df.columns)
if len(col_list) != 0:
for col in col_list:
df_list_and_dict_col(explode_df[[primary_key,col]], primary_key,
col, folder)
explode_df.drop(columns=col, inplace =True)
print(f'{col}.csv is done')
explode_df.to_csv(f'{folder}/{col_name}.csv')
first we get list col and pass col to function one by one and then check is there any list inside col and then go on and save into csv
type_df = df.applymap(type)
col_list =[]
for col in type_df.columns:
if (type_df[col]==type([])).any():
col_list.append(col)
for col in col_list:
# print(col, df[['associateOID',col]])
df_list_and_dict_col(df[['primary_key',col]].copy(), 'primary_key', col,folder='worker')
df.drop(columns=col, inplace=True)
now you have multiple csv in normalise format

Create a nested data dictionary in Python

I have the data as below
{
"employeealias": "101613177",
"firstname": "Lion",
"lastname": "King",
"date": "2022-04-21",
"type": "Thoughtful Intake",
"subject": "Email: From You Success Coach"
}
{
"employeealias": "101613177",
"firstname": "Lion",
"lastname": "King",
"date": "2022-04-21",
"type": null,
"subject": "Call- CDL options & career assessment"
}
I need to create a dictionary like the below:
You have to create new dictionary with list and use for-loop to check if exists employeealias, firstname, lastname to add other information to sublist. If item doesn't exist then you have to create new item with employeealias, firstname, lastname and other information.
data = [
{"employeealias":"101613177","firstname":"Lion","lastname":"King","date":"2022-04-21","type":"Thoughtful Intake","subject":"Email: From You Success Coach"},
{"employeealias":"101613177","firstname":"Lion","lastname":"King","date":"2022-04-21","type":"null","subject":"Call- CDL options & career assessment"},
]
result = {'interactions': []}
for row in data:
found = False
for item in result['interactions']:
if (row["employeealias"] == item["employeealias"]
and row["firstname"] == item["firstname"]
and row["lastname"] == item["lastname"]):
item["activity"].append({
"date": row["date"],
"subject": row["subject"],
"type": row["type"],
})
found = True
break
if not found:
result['interactions'].append({
"employeealias": row["employeealias"],
"firstname": row["firstname"],
"lastname": row["lastname"],
"activity": [{
"date": row["date"],
"subject": row["subject"],
"type": row["type"],
}]
})
print(result)
EDIT:
You read lines as normal text but you have to convert text to dictonary using module json
import json
data = []
with open("/Users/Downloads/amazon_activity_feed_0005_part_00.json") as a_file:
for line in a_file:
line = line.strip()
dictionary = json.loads(line)
data.append(dictionary)
print(data)
You can create a nested dictionary inside Python like this:
student = {name : "Suman", Age = 20, gender: "male",{class : 11, roll no: 12}}

How do I parse nested json objects?

I am trying to load a JSON file to parse the contents nested in the root object. Currently I have the JSON file open and loaded as such:
with open(outputFile.name) as f:
data = json.load(f)
For the sake of the question here is an example of what the contents of the JSON file are like:
{
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
How do I access the nested objects in "rootObject", such as "person", "kids" and its contents, and "parents"?
Below code using recursive function can extract values using specific key in a nested dictionary or 'lists of dictionaries':
data = {
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
}}
def get_vals(nested, key):
result = []
if isinstance(nested, list) and nested != []: #non-empty list
for lis in nested:
result.extend(get_vals(lis, key))
elif isinstance(nested, dict) and nested != {}: #non-empty dict
for val in nested.values():
if isinstance(val, (list, dict)): #(list or dict) in dict
result.extend(get_vals(val, key))
if key in nested.keys(): #key found in dict
result.append(nested[key])
return result
get_vals(data, 'person')
Output
[{'address': 'some place ave. 123', 'age': 47, 'name': 'Joe'}]
The code for loading the JSON object should look like this:
from json import loads, load
with open("file.json") as file:
var = loads(load(file))
# loads() transforms the string in a python dict object

i want to convert sample JSON data into nested JSON using specific key-value in python

I have below sample data in JSON format :
project_cost_details is my database result set after querying.
{
"1": {
"amount": 0,
"breakdown": [
{
"amount": 169857,
"id": 4,
"name": "SampleData",
"parent_id": "1"
}
],
"id": 1,
"name": "ABC PR"
}
}
Here is full json : https://jsoneditoronline.org/?id=2ce7ab19af6f420397b07b939674f49c
Expected output :https://jsoneditoronline.org/?id=56a47e6f8e424fe8ac58c5e0732168d7
I have this sample JSON which i created using loops in code. But i am stuck at how to convert this to expected JSON format. I am getting sequential changes, need to convert to tree like or nested JSON format.
Trying in Python :
project_cost = {}
for cost in project_cost_details:
if cost.get('Parent_Cost_Type_ID'):
project_id = str(cost.get('Project_ID'))
parent_cost_type_id = str(cost.get('Parent_Cost_Type_ID'))
if project_id not in project_cost:
project_cost[project_id] = {}
if "breakdown" not in project_cost[project_id]:
project_cost[project_id]["breakdown"] = []
if 'amount' not in project_cost[project_id]:
project_cost[project_id]['amount'] = 0
project_cost[project_id]['name'] = cost.get('Title')
project_cost[project_id]['id'] = cost.get('Project_ID')
if parent_cost_type_id == cost.get('Cost_Type_ID'):
project_cost[project_id]['amount'] += int(cost.get('Amount'))
#if parent_cost_type_id is None:
project_cost[project_id]["breakdown"].append(
{
'amount': int(cost.get('Amount')),
'name': cost.get('Name'),
'parent_id': parent_cost_type_id,
'id' : cost.get('Cost_Type_ID')
}
)
from this i am getting sample JSON. It will be good if get in this code only desired format.
Also tried this solution mention here : https://adiyatmubarak.wordpress.com/2015/10/05/group-list-of-dictionary-data-by-particular-key-in-python/
I got approach to convert sample JSON to expected JSON :
data = [
{ "name" : "ABC", "parent":"DEF", },
{ "name" : "DEF", "parent":"null" },
{ "name" : "new_name", "parent":"ABC" },
{ "name" : "new_name2", "parent":"ABC" },
{ "name" : "Foo", "parent":"DEF"},
{ "name" : "Bar", "parent":"null"},
{ "name" : "Chandani", "parent":"new_name", "relation": "rel", "depth": 3 },
{ "name" : "Chandani333", "parent":"new_name", "relation": "rel", "depth": 3 }
]
result = {x.get("name"):x for x in data}
#print(result)
tree = [];
for a in data:
#print(a)
if a.get("parent") in result:
parent = result[a.get("parent")]
else:
parent = ""
if parent:
if "children" not in parent:
parent["children"] = []
parent["children"].append(a)
else:
tree.append(a)
Reference help : http://jsfiddle.net/9FqKS/ this is a JavaScript solution i converted to Python
It seems that you want to get a list of values from a dictionary.
result = [value for key, value in project_cost_details.items()]

JSON move child keys up one level

I have a Python script that parses a JSON file like below:
[
{
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": null,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here,
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
},
"sort": [
34872
]
},
I have removed some of the values to keep the post shorter but leaving some in to try to keep the structure.
I want to take all sub keys from the _source key and move them up to the same level as _source and then delete the _source key.
My code to parse the JSON is:
import json
import logging
import logging.handlers
import os
import pymongo
from pymongo import MongoClient
def import_json(mongo_server,mongo_port, vuln_folder):
try:
logging.info('Connecting to MongoDB')
client = MongoClient(mongo_server, mongo_port)
db = client['vuln_sets']
coll = db['vulnerabilities']
logging.info('Connected to MongoDB')
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
archive_filepath = filepath + vuln_folder
filedir = os.chdir(archive_filepath)
file_count = 0
for item in os.listdir(filedir):
if item.endswith('.json'):
file_name = os.path.abspath(item)
with open(item, 'r') as currentfile:
vuln_counter = 0
duplicate_count = 0
logging.info('Currently processing ' + item)
file_count +=1
json_data = currentfile.read()
vuln_content = json.loads(json_data)
for vuln in vuln_content:
try:
del vuln['_type']
coll.insert(vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1
logging.info('Added ' + str(vuln_counter) + ' vulnerabilities for ' + item)
logging.info('Found ' + str(duplicate_count) + ' duplicate records!')
os.remove(file_name)
logging.info('Processed ' + str(file_count) + ' files')
except Exception as e:
logging.exception(e)
Which you can see already deletes one key that is not needed but that key has no needed data where as I need the sub keys from _source. I am not sure on the best way to achieve this, whether it would be programmatically correct to just re-create the JSON file with the new info but I need to keep the order of the keys and structure apart from moving the sub keys up one level.
You can use the dictionary update() function to achieve what you're trying to do, but it's important to note that dictionaries don't have an "order of the keys" - see: Key Order in Python Dictionaries.
Here's an example of one way to do this, starting with a dictionary definition.
d = {
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": None,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here",
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
}
}
# create a new dictionary with everything except the key "_source"
new_d = {key: d[key] for key in d if key != '_source'}
# add the keys/values from "_source" to new dictionary
new_d.update(d['_source']) # This will overwriting any existing keys
The output of new_d:
{'_id': 'OPENWRT-SA-000001',
'_index': 'bulletins',
'_score': None,
'_type': 'bulletin',
'affectedPackage': [{'OS': 'OpenWrt',
'OSVersion': '15.05',
'arch': 'all',
'operator': 'lt',
'packageFilename': 'UNKNOWN',
'packageName': 'bind',
'packageVersion': '9.9.8-P3-1'}],
'bulletinFamily': 'unix',
'cvelist': ['CVE-2015-8704'],
'cvss': {
'score': 7.1,
'vector': 'AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/'},
'description': 'value in here',
'edition': 1,
'href': 'https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html',
'id': 'OPENWRT-SA-000001',
'lastseen': '2016-09-26T15:45:23',
'modified': '2016-01-24T13:33:41',
'published': '2016-01-24T13:33:41',
'references': ['http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-
3193'],
'reporter': 'OpenWrt Project',
'title': 'bind: Security update (4 CVEs)',
'type': 'openwrt'}
I managed to get it working by using the following code:
for vuln in vuln_content:
try:
del vuln['_type']
new_vuln = {key: vuln[key] for key in vuln if key != '_source'}
new_vuln.update(vuln['_source'])
coll.insert(new_vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1

Categories