I have a method where I build a table for multiple items for Google's DLP inspect API which can take either a ContentItem, or a table of values
Here is how the request is constructed:
def redact_text(text_list):
dlp = google.cloud.dlp.DlpServiceClient()
project = 'my-project'
parent = dlp.project_path(project)
items = build_item_table(text_list)
info_types = [{'name': 'EMAIL_ADDRESS'}, {'name': 'PHONE_NUMBER'}]
inspect_config = {
'min_likelihood': "LIKELIHOOD_UNSPECIFIED",
'include_quote': True,
'info_types': info_types
}
response = dlp.inspect_content(parent, inspect_config, items)
return response
def build_item_table(text_list):
rows = []
for item in text_list:
row = {"values": [{"stringValue": item}]}
rows.append(row)
table = {"table": {"headers": [{"name": "something"}], "rows": rows}}
return table
When I run this I get back the error ValueError: Protocol message Value has no "stringValue" field. Even though the this example and the docs say otherwise.
Is there something off in how I build the request?
Edit: Here's the output from build_item_table
{
'table':
{
'headers':
[
{'name': 'value'}
],
'rows':
[
{
'values':
[
{
'stringValue': 'My name is Jenny and my number is (555) 867-5309, you can also email me at anemail#gmail.com, another email you can reach me at is email#email.com. '
}
]
},
{
'values':
[
{
'stringValue': 'Jimbob Doe (555) 111-1233, that one place down the road some_email#yahoo.com'
}
]
}
]
}
}
Try string_value .... python uses the field names, not the type name.
Related
I have two dictionaries, as below. Both dictionaries have a list of dictionaries as the value associated with their properties key; each dictionary within these lists has an id key. I wish to merge my two dictionaries into one such that the properties list in the resulting dictionary only has one dictionary for each id.
{
"name":"harry",
"properties":[
{
"id":"N3",
"status":"OPEN",
"type":"energetic"
},
{
"id":"N5",
"status":"OPEN",
"type":"hot"
}
]
}
and the other list:
{
"name":"harry",
"properties":[
{
"id":"N3",
"type":"energetic",
"language": "english"
},
{
"id":"N6",
"status":"OPEN",
"type":"cool"
}
]
}
The output I am trying to achieve is:
"name":"harry",
"properties":[
{
"id":"N3",
"status":"OPEN",
"type":"energetic",
"language": "english"
},
{
"id":"N5",
"status":"OPEN",
"type":"hot"
},
{
"id":"N6",
"status":"OPEN",
"type":"cool"
}
]
}
As id: N3 is common in both the lists, those 2 dicts should be merged with all the fields. So far I have tried using itertools and
ds = [d1, d2]
d = {}
for k in d1.keys():
d[k] = tuple(d[k] for d in ds)
Could someone please help in figuring this out?
Here is one of the approach:
a = {
"name":"harry",
"properties":[
{
"id":"N3",
"status":"OPEN",
"type":"energetic"
},
{
"id":"N5",
"status":"OPEN",
"type":"hot"
}
]
}
b = {
"name":"harry",
"properties":[
{
"id":"N3",
"type":"energetic",
"language": "english"
},
{
"id":"N6",
"status":"OPEN",
"type":"cool"
}
]
}
# Create dic maintaining the index of each id in resp dict
a_ids = {item['id']: index for index,item in enumerate(a['properties'])} #{'N3': 0, 'N5': 1}
b_ids = {item['id']: index for index,item in enumerate(b['properties'])} #{'N3': 0, 'N6': 1}
# Loop through one of the dict created
for id in a_ids.keys():
# If same ID exists in another dict, update it with the key value
if id in b_ids:
b['properties'][b_ids[id]].update(a['properties'][a_ids[id]])
# If it does not exist, then just append the new dict
else:
b['properties'].append(a['properties'][a_ids[id]])
print (b)
Output:
{'name': 'harry', 'properties': [{'id': 'N3', 'type': 'energetic', 'language': 'english', 'status': 'OPEN'}, {'id': 'N6', 'status': 'OPEN', 'type': 'cool'}, {'id': 'N5', 'status': 'OPEN', 'type': 'hot'}]}
It might help to treat the two objects as elements each in their own lists. Maybe you have other objects with different name values, such as might come out of a JSON-formatted REST request.
Then you could do a left outer join on both name and id keys:
#!/usr/bin/env python
a = [
{
"name": "harry",
"properties": [
{
"id":"N3",
"status":"OPEN",
"type":"energetic"
},
{
"id":"N5",
"status":"OPEN",
"type":"hot"
}
]
}
]
b = [
{
"name": "harry",
"properties": [
{
"id":"N3",
"type":"energetic",
"language": "english"
},
{
"id":"N6",
"status":"OPEN",
"type":"cool"
}
]
}
]
a_names = set()
a_prop_ids_by_name = {}
a_by_name = {}
for ao in a:
an = ao['name']
a_names.add(an)
if an not in a_prop_ids_by_name:
a_prop_ids_by_name[an] = set()
for ap in ao['properties']:
api = ap['id']
a_prop_ids_by_name[an].add(api)
a_by_name[an] = ao
res = []
for bo in b:
bn = bo['name']
if bn not in a_names:
res.append(bo)
else:
ao = a_by_name[bn]
bp = bo['properties']
for bpo in bp:
if bpo['id'] not in a_prop_ids_by_name[bn]:
ao['properties'].append(bpo)
res.append(ao)
print(res)
The idea above is to process list a for names and ids. The names and ids-by-name are instances of a Python set. So members are always unique.
Once you have these sets, you can do the left outer join on the contents of list b.
Either there's an object in b that doesn't exist in a (i.e. shares a common name), in which case you add that object to the result as-is. But if there is an object in b that does exist in a (which shares a common name), then you iterate over that object's id values and look for ids not already in the a ids-by-name set. You add missing properties to a, and then add that processed object to the result.
Output:
[{'name': 'harry', 'properties': [{'id': 'N3', 'status': 'OPEN', 'type': 'energetic'}, {'id': 'N5', 'status': 'OPEN', 'type': 'hot'}, {'id': 'N6', 'status': 'OPEN', 'type': 'cool'}]}]
This doesn't do any error checking on input. This relies on name values being unique per object. So if you have duplicate keys in objects in both lists, you may get garbage (incorrect or unexpected output).
I am trying to get the value of DomainName from the below dictionary.
print(domain_name)
# output
{
'DomainNames': [
{
'DomainName': 'some-value'
},
]
}
I have tried:
print(domain_name['DomainNames'][0]['DomainName'])
but it doesn't give that value. I even tried:
print(domain_name['DomainNames']['DomainName'])
Here is my code:
def add_es_tags():
for region in get_regions_depending_on_account():
pass
es_client = boto3.client('es', region_name="us-east-1")
response = es_client.list_domain_names()
get_es_domain_ARN("us-east-1", response)
def get_es_domain_ARN(region, domain_names):
es_client = boto3.client('es', region_name=region)
arns = []
print(len(domain_names))
for domain_name in domain_names:
# print(type(domain_name))
print(domain_name['DomainNames'][0]['DomainName'])
Like this:
domain_name = {
'DomainNames': [
{
'DomainName': 'some-value'
},
]
}
print(domain_name)
print(domain_name['DomainNames'][0]['DomainName'])
Yes, the answer is: it works exactly as you suggested!
Edit: Never mind, I'll update this when you've formulated a full question that actually matches what you're doing.
I have an Eve app publishing a simple read-only (GET) interface. It is interfacing a MongoDB collection called centroids, which has documents like:
[
{
"name":"kachina chasmata",
"location":{
"type":"Point",
"coordinates":[-116.65,-32.6]
},
"body":"ariel"
},
{
"name":"hokusai",
"location":{
"type":"Point",
"coordinates":[16.65,57.84]
},
"body":"mercury"
},
{
"name":"caƱas",
"location":{
"type":"Point",
"coordinates":[89.86,-31.188]
},
"body":"mars"
},
{
"name":"anseris cavus",
"location":{
"type":"Point",
"coordinates":[95.5,-29.708]
},
"body":"mars"
}
]
Currently, (Eve) settings declare a DOMAIN as follows:
crater = {
'hateoas': False,
'item_title': 'crater centroid',
'url': 'centroid/<regex("[\w]+"):body>/<regex("[\w ]+"):name>',
'datasource': {
'projection': {'name': 1, 'body': 1, 'location.coordinates': 1}
}
}
DOMAIN = {
'centroids': crater,
}
Which will successfully answer to requests of the form http://hostname/centroid/<body>/<name>. Inside MongoDB this represents a query like: db.centroids.find({body:<body>, name:<name>}).
What I would like to do also is to offer an endpoint for all the documents of a given body. I.e., a request to http://hostname/centroids/<body> would answer the list of all documents with body==<body>: db.centroids.find({body:<body>}).
How do I do that?
I gave a shot by including a list of rules to the DOMAIN key centroids (the name of the database collection) like below,
crater = {
...
}
body = {
'item_title': 'body craters',
'url': 'centroids/<regex("[\w]+"):body>'
}
DOMAIN = {
'centroids': [crater, body],
}
but didn't work...
AttributeError: 'list' object has no attribute 'setdefault'
Got it!
I was assuming the keys in the DOMAIN structure was directly related to the collection Eve was querying. That is true for the default settings, but it can be adjusted inside the resources datasource.
I figured that out while handling an analogous situation as that of the question: I wanted to have an endpoint hostname/bodies listing all the (unique) values for body in the centroids collection. To that, I needed to set an aggregation to it.
The following settings give me exactly that ;)
centroids = {
'item_title': 'centroid',
'url': 'centroid/<regex("[\w]+"):body>/<regex("[\w ]+"):name>',
'datasource': {
'source': 'centroids',
'projection': {'name': 1, 'body': 1, 'location.coordinates': 1}
}
}
bodies = {
'datasource': {
'source': 'centroids',
'aggregation': {
'pipeline': [
{"$group": {"_id": "$body"}},
]
},
}
}
DOMAIN = {
'centroids': centroids,
'bodies': bodies
}
The endpoint, for example, http://127.0.0.1:5000/centroid/mercury/hokusai give me the name, body, and coordinates of mercury/hokusai.
And the endpoint http://127.0.0.1:5000/bodies, the list of unique values for body in centroids.
Beautiful. Thumbs up to Eve!
I am trying to get a search volume metric from the Google Ads API. I am running into trouble when I using the "SearchVolumeSearchParameter" argument. This argument requires an "operation" field and the documentation does not do a great job on explaining what these operations can be. Preferably, I would like the script to return a list of keywords and their respective search volumes for the previous month.
adwords_client = adwords.AdWordsClient.LoadFromStorage()
targeting_idea_service = adwords_client.GetService(
'TargetingIdeaService', version='v201809')
selector = {
'ideaType': 'KEYWORD',
'requestType': 'STATS'
}
selector['requestedAttributeTypes'] = [
'KEYWORD_TEXT',
'SEARCH_VOLUME',
# 'TARGETED_MONTHLY_SEARCHES',
]
offset = 0
PAGE_SIZE = 500
selector['paging'] = {
'startIndex': str(offset),
'numberResults': str(PAGE_SIZE)
}
selector['searchParameters'] = [{
'xsi_type': 'SearchVolumeSearchParameter',
'operation': []
}]
page = targeting_idea_service.get(selector)
You use maximum, minimum like so:
selector['searchParameters'] = [
{
'xsi_type': 'RelatedToQuerySearchParameter',
'queries': search_keywords
},
{
'xsi_type': 'LocationSearchParameter',
'locations': [
{'id': location_id}
]
},
{
'xsi_type': 'SearchVolumeSearchParameter',
'operation': [
{'minimum': 100}
]
}
]
When I run the following code I get this error.
{'error': {'code': 400, 'message': 'Invalid JSON payload received. Unknown name "album_id": Proto field is not repeating, cannot start list.', 'status': 'INVALID_ARGUMENT', 'details': [{'#type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'description': 'Invalid JSON payload received. Unknown name "album_id": Proto field is not repeating, cannot start list.'}]}]}}
If I remove the "albumId": ["albumid code"] it works fine and returns
10 new items, total 10
def _actually_list_media_items(session):
ret = []
params = {
'fields': 'mediaItems(id,baseUrl,filename,mimeType,productUrl),nextPageToken',
}
search_json = {
"pageSize": 10,
"albumId": ["<albumid code>"],
"filters": {
"includeArchivedMedia": False,
"contentFilter": {
"excludedContentCategories": [
"DOCUMENTS",
"RECEIPTS",
"SCREENSHOTS",
"UTILITY",
"WHITEBOARDS",
]
},
"mediaTypeFilter": {
"mediaTypes": [
"PHOTO",
],
},
},
}
tmp = 0
while tmp < 1:
rsp = session.post(
'https://photoslibrary.googleapis.com/v1/mediaItems:search',
params=params,
json=search_json,
).json()
if 'error' in rsp:
print(rsp)
cur = [m for m in rsp.get('mediaItems', [])]
ret += cur
print(f'{len(cur)} new items, total {len(ret)}')
pageToken = rsp.get('nextPageToken')
if pageToken is None:
break
params['pageToken'] = pageToken
tmp = tmp + 1
return ret
The comment about albumId and filters being exclusive is correct, so you need to pick one or the other. However, assuming you want to use the albumId by itself, you need to remove the square brackets around your albumid code, here's a clip from my code:
searchbody = {
"albumId": album_id,
"pageSize": 10
}
print(searchbody)
mediaresults = gAPIservice.mediaItems().search(body=searchbody).execute()
mediaitems = mediaresults.get('mediaItems', [])
for item in mediaitems:
print(u'{0} ({1})'.format(item['filename'], item['id']))
Edit:
Apparently you can't use albumId and filters together: source
filters: object(Filters)
Filters to apply to the request. Can't be set in conjunction with an albumId.
Aside from that, albumId is a supposed to be a string not an array: source
"albumId": "<albumid code>",