How to access certain values inside a string in Python - python

I need to extract only a particular element value inside the string.
Below is the code which I used to get the AdsInsight data using Facebook AdsInsight API.
class LibFacebook:
def __init__(self, app_id, app_secret, access_token, ad_account_id):
FacebookAdsApi.init(app_id, app_secret, access_token)
self.account = AdAccount(ad_account_id)
#get ads insight
insights = self.account.get_insights(fields=[
AdsInsights.Field.campaign_id,
AdsInsights.Field.actions,
], params={
'level': AdsInsights.Level.campaign,
})
print(insights)
Output
<AdsInsights> {
"campaign_id": "23843294609751234",
"actions": [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "landing_page_view",
"value": "78"
},
{
"action_type": "link_click",
"value": "163"
}
]
Question : Along with campaign_id value(23843294609751234) , I need the value of only landing_page_view i.e 78 (and not other action items)and put it in a df. How do I access them ?
Further Information: AdsInsights.Field.actions is of type string.
type(AdsInsights.Field.actions)
str

hope this will work,
lets take your data is a list of AdsInsights objects
obj = [{
"campaign_id": "23843294609751234",
"actions" : [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "landing_page_view",
"value": "78"
},
{
"action_type": "link_click",
"value": "163"
}
]
},
{
"campaign_id": "112233",
"actions" : [
{
"action_type": "post_reaction",
"value": "1"
},
{
"action_type": "landing_page_view",
"value": "100"
},
{
"action_type": "link_click",
"value": "163"
}
]
}]
you can get result like this
result_arr = []
for i in obj:
datadict = {}
datadict["campaign_id"] = i.get("campaign_id")
for action in i.get("actions"):
if action.get("action_type") == "landing_page_view":
datadict["value"]= action.get("value")
result_arr.append(datadict)
result_arr would be
[{'campaign_id': '23843294609751234', 'value': '78'},
{'campaign_id': '112233', 'value': '100'}]
next convert list of dictionaries to a dataframe
df=pd.DataFrame(result_arr)

Related

replace nested document array mongodb with python

i have this document in mongodb
{
"_id": {
"$oid": "62644af0368cb0a46d7c2a95"
},
"insertionData": "23/04/2022 19:50:50",
"ipfsMetadata": {
"Name": "data.json",
"Hash": "Qmb3FWgyJHzJA7WCBX1phgkV93GiEQ9UDWUYffDqUCbe7E",
"Size": "431"
},
"metadata": {
"sessionDate": "20220415 17:42:55",
"dataSender": "user345",
"data": {
"height": "180",
"weight": "80"
},
"addtionalInformation": [
{
"name": "poolsize",
"value": "30m"
},
{
"name": "swimStyle",
"value": "mariposa"
},
{
"name": "modality",
"value": "swim"
},
{
"name": "gender-title",
"value": "schoolA"
}
]
},
"fileId": {
"$numberLong": "4"
}
}
I want to update nested array document, for instance the name with gender-tittle. This have value schoolA and i want to change to adult like the body. I give the parameter number of fileId in the post request and in body i pass this
post request : localhost/sessionUpdate/4
and body:
{
"name": "gender-title",
"value": "adultos"
}
flask
#app.route('/sessionUpdate/<string:a>', methods=['PUT'])
def sessionUpdate(a):
datas=request.json
r=str(datas['name'])
r2=str(datas['value'])
print(r,r2)
r3=collection.update_one({'fileId':a, 'metadata.addtionalInformation':r}, {'$set':{'metadata.addtionalInformation.$.value':r2}})
return str(r3),200
i'm getting the 200 but the document don't update with the new value.
As you are using positional operator $ to work with your array, make sure your select query is targeting array element. You can see in below query that it is targeting metadata.addtionalInformation array with the condition that name: "gender-title"
db.collection.update({
"fileId": 4,
"metadata.addtionalInformation.name": "gender-title"
},
{
"$set": {
"metadata.addtionalInformation.$.value": "junior"
}
})
Here is the Mongo playground for your reference.

Creating custom JSON from existing JSON using Python

(Python beginner alert) I am trying to create a custom JSON from an existing JSON. The scenario is - I have a source which can send many set of fields but I want to cherry pick some of them and create a subset of that while maintaining the original JSON structure. Original Sample
{
"Response": {
"rCode": "11111",
"rDesc": "SUCCESS",
"pData": {
"code": "123-abc-456-xyz",
"sData": [
{
"receiptTime": "2014-03-02T00:00:00.000",
"sessionDate": "2014-02-28",
"dID": {
"d": {
"serialNo": "3432423423",
"dType": "11111",
"dTypeDesc": "123123sd"
},
"mode": "xyz"
},
"usage": {
"duration": "661",
"mOn": [
"2014-02-28_20:25:00",
"2014-02-28_22:58:00"
],
"mOff": [
"2014-02-28_21:36:00",
"2014-03-01_03:39:00"
]
},
"set": {
"abx": "1",
"ayx": "1",
"pal": "1"
},
"rEvents": {
"john": "doe",
"lorem": "ipsum"
}
},
{
"receiptTime": "2014-04-02T00:00:00.000",
"sessionDate": "2014-04-28",
"dID": {
"d": {
"serialNo": "123123",
"dType": "11111",
"dTypeDesc": "123123sd"
},
"mode": "xyz"
},
"usage": {
"duration": "123",
"mOn": [
"2014-04-28_20:25:00",
"2014-04-28_22:58:00"
],
"mOff": [
"2014-04-28_21:36:00",
"2014-04-01_03:39:00"
]
},
"set": {
"abx": "4",
"ayx": "3",
"pal": "1"
},
"rEvents": {
"john": "doe",
"lorem": "ipsum"
}
}
]
}
}
}
Here the sData array tag has got few tags out of which I want to keep only 24 and get rid of the rest. I know I could use element.pop() but I cannot go and delete a new incoming field every time the source publishes it. Below is the expected output -
Expected Output
{
"Response": {
"rCode": "11111",
"rDesc": "SUCCESS",
"pData": {
"code": "123-abc-456-xyz",
"sData": [
{
"receiptTime": "2014-03-02T00:00:00.000",
"sessionDate": "2014-02-28",
"usage": {
"duration": "661",
"mOn": [
"2014-02-28_20:25:00",
"2014-02-28_22:58:00"
],
"mOff": [
"2014-02-28_21:36:00",
"2014-03-01_03:39:00"
]
},
"set": {
"abx": "1",
"ayx": "1",
"pal": "1"
}
},
{
"receiptTime": "2014-04-02T00:00:00.000",
"sessionDate": "2014-04-28",
"usage": {
"duration": "123",
"mOn": [
"2014-04-28_20:25:00",
"2014-04-28_22:58:00"
],
"mOff": [
"2014-04-28_21:36:00",
"2014-04-01_03:39:00"
]
},
"set": {
"abx": "4",
"ayx": "3",
"pal": "1"
}
}
]
}
}
}
I myself took reference from How can I create a new JSON object form another using Python? but its not working as expected. Looking forward for inputs/solutions from all of you gurus. Thanks in advance.
Kind of like this:
data = json.load(open("fullset.json"))
def subset(d):
newd = {}
for name in ('receiptTime','sessionData','usage','set'):
newd[name] = d[name]
return newd
data['Response']['pData']['sData'] = [subset(d) for d in data['Response']['pData']['sData']]
json.dump(data, open('newdata.json','w'))

MongoDB Aggregation Attribute Pattern Pipeline/Query

I have Attribute Patterned (https://www.mongodb.com/blog/post/building-with-patterns-the-attribute-pattern) field that looks like this:
"cmr_diag": [{
"name": "shd?",
"value": {
"$numberDouble": "1"
}
}, {
"name": "ischemic_hd",
"value": {
"$numberDouble": "1"
}
}, {
"name": "non-ischemic_dcmp",
"value": {
"$numberDouble": "1"
}
}, {
"name": "myocarditis",
"value": {
"$numberDouble": "0"
}
}, {
"name": "hcm",
"value": {
"$numberDouble": "0"
}
}, {
"name": "amyloidosis",
"value": {
"$numberDouble": "0"
}
}, {
"name": "toxic_cmp",
"value": {
"$numberDouble": "1"
}
.
.
.
I'd like to create an aggregation pipeline that finds all patients with ONLY ischemic_hd, while all other possible illnesses are 0. I am not sure how to create this query however?
You can use $elemMatch to identify patients with a specific attribute.
If you want to exclude everything else, use $reduce to sum up the value of all of the attributes, and match where count = 1.
db.collection.aggregate([
{$match: {
cmr_diag: {
$elemMatch: {
name: "ischemic_hd",
value: { "$numberDouble": "1" }
}
}
}},
{$addFields: {
diagcount: {
$reduce: {
input: "$cmr_diag",
initialValue: 0,
in: {$sum: ["$$value","$$this.value.$numberDouble"]}
}
}
}},
{$match: { diagcount: 1}}
])

How to get this json specific word from this array

I have this json and I would like to get only the Name from every array. How do I write it in python,
Currently, I have this li = [item.get(data_new[0]'id') for item in data_new]
where data_new is my json data.
[
{
"id": "1687fbfa-8936-4b77-a7bc-123f9f276c49",
"attributes": [
{
"name": "status",
"value": "rejected",
"scope": "identity"
},
{
"name": "created_ts",
"value": "2020-06-25T16:22:07.578Z",
"scope": "system"
},
{
"name": "updated_ts",
"value": "2020-07-08T12:43:09.361Z",
"scope": "system"
},
{
"name": "artifact_name",
"value": "release-v10",
"scope": "inventory"
},
{
"name": "device_type",
"value": "proddemo-device",
"scope": "inventory"
},
],
"updated_ts": "2020-07-08T12:43:09.361Z"
},
{
"id": "0bf2a1fe-6004-473f-88b7-aab061972115",
"attributes": [
{
"name": "status",
"value": "rejected",
"scope": "identity"
},
{
"name": "created_ts",
"value": "2020-07-01T16:23:00.631Z",
"scope": "system"
},
{
"name": "updated_ts",
"value": "2020-07-08T17:41:16.45Z",
"scope": "system"
},
{
"name": "artifact_name",
"value": "Module_logs_v7",
"scope": "inventory"
},
{
"name": "cpu_model",
"value": "ARMv8 Processor",
"scope": "inventory"
},
{
"name": "device_type",
"value": "device",
"scope": "inventory"
},
{
"name": "hostname",
"value": "device004",
"scope": "inventory"
},
{
"name": "ipv4_br-d6eae8b3a339",
"value": "172.0.0.1/18",
"scope": "inventory"
}
],
"updated_ts": "2020-07-08T12:43:09.361Z"
}
]
This is the output snippet from my API and from this output I want to retrieve the value of the device whose name is hostname, as you can see that is the second last entry from this code where "name": "hostname"
So, I want to retrieve the value for that particular json only where the name will be "hostname", how can I do that.
Please guide me through.
a = [{'id': '291ae0e5956c69c2267489213df4459d19ed48a806603def19d417d004a4b67e',
'attributes': [{'name': 'ip_addr',
'value': '1.2.3.4',
'descriptionName': 'IP address'},
{'name': 'ports', 'value': ['8080', '8081'], 'description': 'Open ports'}],
'updated_ts': '2016-10-03T16:58:51.639Z'},
{'id': '76f40e5956c699e327489213df4459d1923e1a806603def19d417d004a4a3ef',
'attributes': [{'name': 'mac',
'value': '00:01:02:03:04:05',
'descriptionName': 'MAC address'}],
'updated_ts': '2016-10-04T18:24:21.432Z'}]
descriptionName = []
for i in a:
for j in i["attributes"]:
for k in j:
if k == "descriptionName":
descriptionName.append(j[k])
One liner:
[j["descriptionName"] for j in i["attributes"] for i in a if "descriptionName" in j ]
Output:
['IP address', 'MAC address']
Update 1:
To get all names
One liner code -
[j["name"] for j in i["attributes"] for i in a if "name" in j.keys()]
Output:
['status',
'status',
'created_ts',
'created_ts',
'updated_ts',
'updated_ts',
'artifact_name',
'artifact_name',
'cpu_model',
'cpu_model',
'device_type',
'device_type',
'hostname',
'hostname',
'ipv4_br-d6eae8b3a339',
'ipv4_br-d6eae8b3a339']
To get value for which name is "hostname"
[j["value"] for j in i["attributes"] for i in a if "name" in j.keys() and j["name"] == "hostname"]
Output:
['device004', 'device004']

Turn List of Dictionaries or Tuples into DataFrame. Actions - Column, Value - rows

I am doing an API call to Facebook and one of the fields is "actions" which creates a dictionary that I would like to break up in to separate DataFrame columns. I have seen a few similar questions using pd.Series() to map them into separate columns or json.normalize(), but those don't exactly do what I'm looking for.
Here's the export before I put it into a dataframe column:
[<AdsInsights> {
"actions": [
{
"action_type": "landing_page_view",
"value": "292"
},
{
"action_type": "comment",
"value": "13"
},
{
"action_type": "onsite_conversion.post_save",
"value": "6"
},
{
"action_type": "link_click",
"value": "874"
},
{
"action_type": "post",
"value": "1"
},
{
"action_type": "post_reaction",
"value": "393"
},
{
"action_type": "post_engagement",
"value": "96"
},
{
"action_type": "page_engagement",
"value": "96"
},
{
"action_type": "omni_activate_app",
"value": "5"
},
{
"action_type": "omni_app_install",
"value": "2"
},
{
"action_type": "omni_add_to_cart",
"value": "75"
},
{
"action_type": "add_to_wishlist",
"value": "14"
},
{
"action_type": "omni_purchase",
"value": "4"
},
{
"action_type": "omni_search",
"value": "12"
},
{
"action_type": "omni_view_content",
"value": "15"
}
]
I then put it into a DF, but the column becomes actions and it fits all of this data into a single row and then repeats it for each item. I am having trouble breaking the action_type into the column header and the value into the row. I get multiple rows of these sets of data when I create the DF.
when I make the dataframe it looks like this:
df[ad] df[actions]
0 ad1 [{'action_type': 'landing_page_view', 'value':...
1 ad2 [{'action_type': 'landing_page_view', 'value':...
2 ad3 [{'action_type': 'landing_page_view', 'value':...
I am hoping to get:
df[ad] df[landing_page_view] df[comment] ...etc
0 ad1 292 13
1 ad2 100 8
2 ad3 80 9
I was trying to pick out the specific ones I wanted to make columns, but this does not work:
df = all of the raw data from the API call
df = pd.DataFrame(df)
actions = df['actions']
def setcolumn(dict, key):
if dict.has_key(key):
df['key'] = 'value'
else:
print ("Not present")
setcolumn(actions, 'landing_page_view')
but this says that series object has no attribute has_key.
any direction is appreciated!
I edited your API result slightly because I'm not sure what the tag at the beginning is for.
The solution is a simple pandas method called from_records(), which converts a list to a dataframe. Here are some reference links:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_records.html
https://pbpython.com/pandas-list-dict.html
How to create pandas DataFrame with index from the list of tuples
import pandas as pd
# facebook API result as dictionary ... I removed the '<AdsInsights> ' at the beginning
list_facebook_api = {
"actions": [
{
"action_type": "landing_page_view",
"value": "292"
},
{
"action_type": "comment",
"value": "13"
},
{
"action_type": "onsite_conversion.post_save",
"value": "6"
},
{
"action_type": "link_click",
"value": "874"
},
{
"action_type": "post",
"value": "1"
},
{
"action_type": "post_reaction",
"value": "393"
},
{
"action_type": "post_engagement",
"value": "96"
},
{
"action_type": "page_engagement",
"value": "96"
},
{
"action_type": "omni_activate_app",
"value": "5"
},
{
"action_type": "omni_app_install",
"value": "2"
},
{
"action_type": "omni_add_to_cart",
"value": "75"
},
{
"action_type": "add_to_wishlist",
"value": "14"
},
{
"action_type": "omni_purchase",
"value": "4"
},
{
"action_type": "omni_search",
"value": "12"
},
{
"action_type": "omni_view_content",
"value": "15"
}
]
}
# create a dataframe from the list inside the dictionary
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_records.html
# https://pbpython.com/pandas-list-dict.html
# https://stackoverflow.com/questions/44563707/how-to-create-pandas-dataframe-with-index-from-the-list-of-tuples
df = pd.DataFrame.from_records(list_facebook_api['actions'])
print(df.shape)
# print(df)
df

Categories