mediaItems.search not working with albumId - python

When I run the following code I get this error.
{'error': {'code': 400, 'message': 'Invalid JSON payload received. Unknown name "album_id": Proto field is not repeating, cannot start list.', 'status': 'INVALID_ARGUMENT', 'details': [{'#type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'description': 'Invalid JSON payload received. Unknown name "album_id": Proto field is not repeating, cannot start list.'}]}]}}
If I remove the "albumId": ["albumid code"] it works fine and returns
10 new items, total 10
def _actually_list_media_items(session):
ret = []
params = {
'fields': 'mediaItems(id,baseUrl,filename,mimeType,productUrl),nextPageToken',
}
search_json = {
"pageSize": 10,
"albumId": ["<albumid code>"],
"filters": {
"includeArchivedMedia": False,
"contentFilter": {
"excludedContentCategories": [
"DOCUMENTS",
"RECEIPTS",
"SCREENSHOTS",
"UTILITY",
"WHITEBOARDS",
]
},
"mediaTypeFilter": {
"mediaTypes": [
"PHOTO",
],
},
},
}
tmp = 0
while tmp < 1:
rsp = session.post(
'https://photoslibrary.googleapis.com/v1/mediaItems:search',
params=params,
json=search_json,
).json()
if 'error' in rsp:
print(rsp)
cur = [m for m in rsp.get('mediaItems', [])]
ret += cur
print(f'{len(cur)} new items, total {len(ret)}')
pageToken = rsp.get('nextPageToken')
if pageToken is None:
break
params['pageToken'] = pageToken
tmp = tmp + 1
return ret

The comment about albumId and filters being exclusive is correct, so you need to pick one or the other. However, assuming you want to use the albumId by itself, you need to remove the square brackets around your albumid code, here's a clip from my code:
searchbody = {
"albumId": album_id,
"pageSize": 10
}
print(searchbody)
mediaresults = gAPIservice.mediaItems().search(body=searchbody).execute()
mediaitems = mediaresults.get('mediaItems', [])
for item in mediaitems:
print(u'{0} ({1})'.format(item['filename'], item['id']))

Edit:
Apparently you can't use albumId and filters together: source
filters: object(Filters)
Filters to apply to the request. Can't be set in conjunction with an albumId.
Aside from that, albumId is a supposed to be a string not an array: source
"albumId": "<albumid code>",

Related

How can I split the path by (".") using python?

-----------------------mapper-------------------
"contact_information":{
"person_name":{
"FormattedName":"some name"
}
}
--------------------current data---------------
client_profile_data = {
"contact_information":{
"person_name":{
"FormattedName":"Abu DND Md"
}
}
}
---------------------changed data------------
profile_data = {
"contact_information":{
"person_name":{
"FormattedName":"Abu DND"
}
}
}
I need to get the changes of "FormattedName(Field)" between client_profile_data & profile_data. So I wrote some function in "helper.py"
------------------------helper.py------------------
PROFILE_FEEDBACK_MAPPINGS = {
'FormattedName': {
'type': 'nested',
'parent_name': "person_name",
'path': "contact_information.person_name.FormattedName"
}
}
def find_diff(client_profile_data, profile_data):
result = []
for key, value in PROFILE_FEEDBACK_MAPPINGS.items():
if value['type'] == 'nested':
try:
if client_profile_data[value['path'][0][1]][key] != profile_data[value['path'][0][1]][key]:
result.append({
'current': profile_data[value['parent_name']][key],
'changed': client_profile_data[value['parent_name']][key],
})
except Exception:
continue
return result
----------------Expected output-------------------
changed: "Abu DND"
current: "Abu DND Md"
-----------------Actual output---------
getting none
Can anyone help me? I need a changes from client_profile_data and profile_data so that I define a function initially which will check the type and after that I want to split the path bcz(contact_information.person_name.FormattedName) will give second if condition will get the differences so that differences will be appending to result. I tried in this way but not working, please help me.
Not sure about what you are looking for but with minimal changes of your code, a solution coud be :
def find_diff(client_profile_data, profile_data):
result = []
for key, value in PROFILE_FEEDBACK_MAPPINGS.items():
if value['type'] == 'nested':
try:
split_path = value['path'].split(".")
client_name = client_profile_data[split_path[0]][split_path[1]][key]
profile_name = profile_data[split_path[0]][split_path[1]][key]
if client_name != profile_name:
result.append({
'current': profile_data[split_path[0]][value['parent_name']][key],
'changed': client_profile_data[split_path[0]][value['parent_name']][key],
})
except Exception:
continue
return result
You forgot to "split" the path to use it as "keys" for your dictionnaries.

How to get an element value in Python dictionary

I am trying to get the value of DomainName from the below dictionary.
print(domain_name)
# output
{
'DomainNames': [
{
'DomainName': 'some-value'
},
]
}
I have tried:
print(domain_name['DomainNames'][0]['DomainName'])
but it doesn't give that value. I even tried:
print(domain_name['DomainNames']['DomainName'])
Here is my code:
def add_es_tags():
for region in get_regions_depending_on_account():
pass
es_client = boto3.client('es', region_name="us-east-1")
response = es_client.list_domain_names()
get_es_domain_ARN("us-east-1", response)
def get_es_domain_ARN(region, domain_names):
es_client = boto3.client('es', region_name=region)
arns = []
print(len(domain_names))
for domain_name in domain_names:
# print(type(domain_name))
print(domain_name['DomainNames'][0]['DomainName'])
Like this:
domain_name = {
'DomainNames': [
{
'DomainName': 'some-value'
},
]
}
print(domain_name)
print(domain_name['DomainNames'][0]['DomainName'])
Yes, the answer is: it works exactly as you suggested!
Edit: Never mind, I'll update this when you've formulated a full question that actually matches what you're doing.

Google DLP: "ValueError: Protocol message Value has no "stringValue" field."

I have a method where I build a table for multiple items for Google's DLP inspect API which can take either a ContentItem, or a table of values
Here is how the request is constructed:
def redact_text(text_list):
dlp = google.cloud.dlp.DlpServiceClient()
project = 'my-project'
parent = dlp.project_path(project)
items = build_item_table(text_list)
info_types = [{'name': 'EMAIL_ADDRESS'}, {'name': 'PHONE_NUMBER'}]
inspect_config = {
'min_likelihood': "LIKELIHOOD_UNSPECIFIED",
'include_quote': True,
'info_types': info_types
}
response = dlp.inspect_content(parent, inspect_config, items)
return response
def build_item_table(text_list):
rows = []
for item in text_list:
row = {"values": [{"stringValue": item}]}
rows.append(row)
table = {"table": {"headers": [{"name": "something"}], "rows": rows}}
return table
When I run this I get back the error ValueError: Protocol message Value has no "stringValue" field. Even though the this example and the docs say otherwise.
Is there something off in how I build the request?
Edit: Here's the output from build_item_table
{
'table':
{
'headers':
[
{'name': 'value'}
],
'rows':
[
{
'values':
[
{
'stringValue': 'My name is Jenny and my number is (555) 867-5309, you can also email me at anemail#gmail.com, another email you can reach me at is email#email.com. '
}
]
},
{
'values':
[
{
'stringValue': 'Jimbob Doe (555) 111-1233, that one place down the road some_email#yahoo.com'
}
]
}
]
}
}
Try string_value .... python uses the field names, not the type name.

KeyError: 'Bytes_Written' python

I do not understand why I get this error Bytes_Written is in the dataset but why can't python find it? I am getting this information(see dataset below) from a VM, I want to select Bytes_Written and Bytes_Read and then subtract the previous values from current value and print a json object like this
{'Bytes_Written': previousValue-currentValue, 'Bytes_Read': previousValue-currentValue}
here is what the data looks like:
{
"Number of Devices": 2,
"Block Devices": {
"bdev0": {
"Backend_Device_Path": "/dev/disk/by-path/ip-192.168.26.1:3260-iscsi-iqn.2010-10.org.openstack:volume-d1c8e7c6-8c77-444c-9a93-8b56fa1e37f2-lun-010.0.0.142",
"Capacity": "2147483648",
"Guest_Device_Name": "vdb",
"IO_Operations": "97069",
"Bytes_Written": "34410496",
"Bytes_Read": "363172864"
},
"bdev1": {
"Backend_Device_Path": "/dev/disk/by-path/ip-192.168.26.1:3260-iscsi-iqn.2010-10.org.openstack:volume-b27110f9-41ba-4bc6-b97c-b5dde23af1f9-lun-010.0.0.146",
"Capacity": "2147483648",
"Guest_Device_Name": "vdb",
"IO_Operations": "93",
"Bytes_Written": "0",
"Bytes_Read": "380928"
}
}
}
This is the complete code that I am running.
FIELDS = ("Bytes_Written", "Bytes_Read", "IO_Operation")
def counterVolume_one(state):
url = 'http://url'
r = requests.get(url)
data = r.json()
for field in FIELDS:
state[field] += data[field]
return state
state = {"Bytes_Written": 0, "Bytes_Read": 0, "IO_Operation": 0}
while True:
counterVolume_one(state)
time.sleep(1)
for field in FIELDS:
print("{field:s}: {count:d}".format(field=field, count=state[field]))
counterVolume_one(state)
Your returned JSON structure does not have any of these FIELDS = ("Bytes_Written", "Bytes_Read", "IO_Operation") keys directly.
You'll need to modify your code slightly.
data = r.json()
for block_device in data['Block Devices'].iterkeys():
for field in FIELDS:
state[field] += int(data['Block Devices'][block_device][field])

elasticsearch scrolling using python client

When scrolling in elasticsearch it is important to provide at each scroll the latest scroll_id:
The initial search request and each subsequent scroll request returns
a new scroll_id — only the most recent scroll_id should be used.
The following example (taken from here) puzzle me. First, the srolling initialization:
rs = es.search(index=['tweets-2014-04-12','tweets-2014-04-13'],
scroll='10s',
search_type='scan',
size=100,
preference='_primary_first',
body={
"fields" : ["created_at", "entities.urls.expanded_url", "user.id_str"],
"query" : {
"wildcard" : { "entities.urls.expanded_url" : "*.ru" }
}
}
)
sid = rs['_scroll_id']
and then the looping:
tweets = [] while (1):
try:
rs = es.scroll(scroll_id=sid, scroll='10s')
tweets += rs['hits']['hits']
except:
break
It works, but I don't see where sid is updated... I believe that it happens internally, in the python client; but I don't understand how it works...
This is an old question, but for some reason came up first when searching for "elasticsearch python scroll". The python module provides a helper method to do all the work for you. It is a generator function that will return each document to you while managing the underlying scroll ids.
https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
Here is an example of usage:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
query = {
"query": {"match_all": {}}
}
es = Elasticsearch(...)
for hit in scan(es, index="my-index", query=query):
print(hit["_source"]["field"])
Using python requests
import requests
import json
elastic_url = 'http://localhost:9200/my_index/_search?scroll=1m'
scroll_api_url = 'http://localhost:9200/_search/scroll'
headers = {'Content-Type': 'application/json'}
payload = {
"size": 100,
"sort": ["_doc"]
"query": {
"match" : {
"title" : "elasticsearch"
}
}
}
r1 = requests.request(
"POST",
elastic_url,
data=json.dumps(payload),
headers=headers
)
# first batch data
try:
res_json = r1.json()
data = res_json['hits']['hits']
_scroll_id = res_json['_scroll_id']
except KeyError:
data = []
_scroll_id = None
print 'Error: Elastic Search: %s' % str(r1.json())
while data:
print data
# scroll to get next batch data
scroll_payload = json.dumps({
'scroll': '1m',
'scroll_id': _scroll_id
})
scroll_res = requests.request(
"POST", scroll_api_url,
data=scroll_payload,
headers=headers
)
try:
res_json = scroll_res.json()
data = res_json['hits']['hits']
_scroll_id = res_json['_scroll_id']
except KeyError:
data = []
_scroll_id = None
err_msg = 'Error: Elastic Search Scroll: %s'
print err_msg % str(scroll_res.json())
Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#search-request-scroll
In fact the code has a bug in it - in order to use the scroll feature correctly you are supposed to use the new scroll_id returned with each new call in the next call to scroll(), not reuse the first one:
Important
The initial search request and each subsequent scroll request returns
a new scroll_id — only the most recent scroll_id should be used.
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-scroll.html
It's working because Elasticsearch does not always change the scroll_id in between calls and can for smaller result sets return the same scroll_id as was originally returned for some time. This discussion from last year is between two other users seeing the same issue, the same scroll_id being returned for awhile:
http://elasticsearch-users.115913.n3.nabble.com/Distributing-query-results-using-scrolling-td4036726.html
So while your code is working for a smaller result set it's not correct - you need to capture the scroll_id returned in each new call to scroll() and use that for the next call.
self._elkUrl = "http://Hostname:9200/logstash-*/_search?scroll=1m"
self._scrollUrl="http://Hostname:9200/_search/scroll"
"""
Function to get the data from ELK through scrolling mechanism
"""
def GetDataFromELK(self):
#implementing scroll and retriving data from elk to get more than 100000 records at one search
#ref :https://www.elastic.co/guide/en/elasticsearch/reference/6.8/search-request-scroll.html
try :
dataFrame=pd.DataFrame()
if self._elkUrl is None:
raise ValueError("_elkUrl is missing")
if self._username is None:
raise ValueError("_userNmae for elk is missing")
if self._password is None:
raise ValueError("_password for elk is missing")
response=requests.post(self._elkUrl,json=self.body,auth=(self._username,self._password))
response=response.json()
if response is None:
raise ValueError("response is missing")
sid = response['_scroll_id']
hits = response['hits']
total= hits["total"]
if total is None:
raise ValueError("total hits from ELK is none")
total_val=int(total['value'])
url = self._scrollUrl
if url is None:
raise ValueError("scroll url is missing")
#start scrolling
while(total_val>0):
#keep search context alive for 2m
scroll = '2m'
scroll_query={"scroll" : scroll, "scroll_id" : sid }
response1=requests.post(url,json=scroll_query,auth=(self._username,self._password))
response1=response1.json()
# The result from the above request includes a scroll_id, which should be passed to the scroll API in order to retrieve the next batch of results
sid = response1['_scroll_id']
hits=response1['hits']
data=response1['hits']['hits']
if len(data)>0:
cleanDataFrame=self.DataClean(data)
dataFrame=dataFrame.append(cleanDataFrame)
total_val=len(response1['hits']['hits'])
num=len(dataFrame)
print('Total records recieved from ELK=',num)
return dataFrame
except Exception as e:
logging.error('Error while getting the data from elk', exc_info=e)
sys.exit()
from elasticsearch import Elasticsearch
elasticsearch_user_name ='es_username'
elasticsearch_user_password ='es_password'
es_index = "es_index"
es = Elasticsearch(["127.0.0.1:9200"],
http_auth=(elasticsearch_user_name, elasticsearch_user_password))
query = {
"query": {
"bool": {
"must": [
{
"range": {
"es_datetime": {
"gte": "2021-06-21T09:00:00.356Z",
"lte": "2021-06-21T09:01:00.356Z",
"format": "strict_date_optional_time"
}
}
}
]
}
},
"fields": [
"*"
],
"_source": False,
"size": 2000,
}
resp = es.search(index=es_index, body=query, scroll="1m")
old_scroll_id = resp['_scroll_id']
results = resp['hits']['hits']
while len(results):
for i, r in enumerate(results):
# do something whih data
pass
result = es.scroll(
scroll_id=old_scroll_id,
scroll='1m' # length of time to keep search context
)
# check if there's a new scroll ID
if old_scroll_id != result['_scroll_id']:
print("NEW SCROLL ID:", result['_scroll_id'])
# keep track of pass scroll _id
old_scroll_id = result['_scroll_id']
results = result['hits']['hits']

Categories