python appending to an array in elasticsearch 5.4 using upsert - python

I am trying to use upsert to create a new badges record if it doesn't exist and append to the names array if it does. I have tried to follow Elasticsearch upserting and appending to array and the documentation without success. So far I have
es.update(index = '.people',
doc_type = 'badges',
id= match['badgeNumber'],
body = {
"script": {
"inline": "if(ctx._source.names.contains(nm)) {ctx.op = 'none'} else {ctx._source.names += params.nm}",
"lang" : "painless",
"params": {
"nm": name
}
},
"upsert": {
"names": name
}
})
The code works fine to add new documents such as:
{
"_index" : ".people",
"_type" : "badges",
"_id" : "12345",
"_score" : 1.0,
"_source" : {
"names" : [
"John Smith"
]
}
},
{
"_index" : ".people",
"_type" : "badges",
"_id" : "7896",
"_score" : 1.0,
"_source" : {
"names" : [
"Amy Wexler"
]
}
}
but if I try to update the list:
match = {'badge' = '12345'}
name = 'Johnny Smith'
update_names(name, match)
I get the error:
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/elasticsearch/client/utils.py", line 73, in _wrapped
return func(*args, params=params, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/elasticsearch/client/__init__.py", line 525, in update
doc_type, id, '_update'), params=params, body=body)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/elasticsearch/transport.py", line 312, in perform_request
status, headers, data = connection.perform_request(method, url, params, body, ignore=ignore, timeout=timeout)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/elasticsearch/connection/http_urllib3.py", line 128, in perform_request
self._raise_error(response.status, raw_data)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/elasticsearch/connection/base.py", line 125, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
elasticsearch.exceptions.RequestError: TransportError(400, 'illegal_argument_exception', '[9wu5eiG][127.0.0.1:9300][indices:data/write/update[s]]')

Related

Elasticserach with python: how to search for documents that do not equal

I am trying to get all the those documents where session is not "None". No matter which way try it, I get a error:
query = {
"bool" : {
"must_not" : {
"term" : {
"session" : "None"
}
}
}
}
#resp = es.search(index="test-sql-index", query={"must_not": {"session" : session_ID}}, size=1000)
resp = es.search(index="test-sql-index", body=query)
I always get the following error:
RequestError(400, 'parsing_exception', 'Unknown key for a START_OBJECT in [bool].')
File "C:\Users\Mahir\Neuer Ordner\ElasticPython.py", line 122, in searchReturncode
resp = es.search(index="test-sql-index", body=query)
I am trying to get all the those documents where session is not "None"
I also tried it this way:
resp = es.search(index="test-sql-index", query={"bool":{"must_not": {"returncode" : "None"}}}, size=1000)
you need to start your query by the term query
query = {
"query":{
"bool" : {
"must_not" : [{
"term" : {
"session.keyword" : "None"
}
}]
}
}}

pinpoint put_events from lambda function NotFoundException

I have set up an AWS PinPoint project and I'm trying to test it by sending an event from a lambda function:
import boto3
import datetime
import time
client = boto3.client('pinpoint')
app_id = '1234abcd'
endpoint_id = 'test_endpoint'
address = 'test#test.com'
def lambda_handler(event, context):
response = client.put_events(
ApplicationId = app_id,
EventsRequest={
'BatchItem': {
endpoint_id: {
'Endpoint': {
'ChannelType': 'EMAIL',
'Address': address,
'Attributes': {
'Cart': ['Hat'],
'Purchased': ['No']
}
},
'Events':{
'cart-event-2': {
'Attributes':{
'AddedToCart': 'Hat'
},
'EventType': 'AddToCartEvent',
'Metrics': {
'price': 29.95
},
'Timestamp': datetime.datetime.fromtimestamp(time.time()).isoformat()
}
}
}
}
}
)
return response
But I am receiving an error that the resource cannot be found, even though I can see it in Pin Point console:
{
"errorMessage": "An error occurred (NotFoundException) when calling the PutEvents operation: Resource not found",
"errorType": "NotFoundException",
"requestId": "xxxxx-xxxxx-xxxx-xxxx-xxxxxxxxx",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 12, in lambda_handler\n response = client.put_events(\n",
" File \"/var/runtime/botocore/client.py\", line 391, in _api_call\n return self._make_api_call(operation_name, kwargs)\n",
" File \"/var/runtime/botocore/client.py\", line 719, in _make_api_call\n raise error_class(parsed_response, operation_name)\n"
]
}
Turns out I was just in the wrong region on my AWS account. 🧠
I created the AWS pinpoint project in one region but was trying to send events to the project from another AWS region, which was why I was getting the NotFoundException.

Iterate through nested JSON in Python

js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
Let us name this json "js" as a variable, this variable will be in a for-loop.
To understand better what I'm doing here, I'm trying to collect data from a game.
This game has hundreds of different tanks, each tank has tank_id with which I can post tank_id to the game server and respond the performance data as "js".
for tank_id: json = requests.post(tank_id) etc...
and fetch all these values to my database as shown in the screenshot.
my python code for it:
def api_get():
for property in js['data']['542250529']['all']:
spotted = property['spotted']
battles_on_stunning_vehicles = property['battles_on_stunning_vehicles']
# etc
# ...
insert_to_db(spotted, battles_on_stunning_vehicles, etc....)
the exception is:
for property in js['data']['542250529']['all']:
TypeError: list indices must be integers or slices, not str
and when:
print(js['data']['542250529'])
i get the rest of the js as a string, and i can't iterate... can't be used a valid json string, also what's inside js['data']['542250529'] is a list containing only the item 'all'..., any help would be appreciated
You just missed [0] to get the first item in a list:
def api_get():
for property in js['data']['542250529'][0]['all']:
spotted = property['spotted']
# ...
Look carefully at the data structure in the source JSON.
There is a list containing the dictionary with a key of all. So you need to use js['data']['542250529'][0]['all'] not js['data']['542250529']['all']. Then you can use .items() to get the key-value pairs.
See below.
js = {
"status": "ok",
"meta": {
"count": 1
},
"data": {
"542250529": [
{
"all": {
"spotted": 438,
"battles_on_stunning_vehicles": 0,
"avg_damage_blocked": 39.4,
"capture_points": 40,
"explosion_hits": 0,
"piercings": 3519,
"xp": 376586,
"survived_battles": 136,
"dropped_capture_points": 382,
"damage_dealt": 783555,
"hits_percents": 74,
"draws": 2,
"battles": 290,
"damage_received": 330011,
"frags": 584,
"stun_number": 0,
"direct_hits_received": 1164,
"stun_assisted_damage": 0,
"hits": 4320,
"battle_avg_xp": 1299,
"wins": 202,
"losses": 86,
"piercings_received": 1004,
"no_damage_direct_hits_received": 103,
"shots": 5857,
"explosion_hits_received": 135,
"tanking_factor": 0.04
}
}
]
}
}
for key, val in js['data']['542250529'][0]['all'].items():
print("key:", key, " val:", val)
#Or this way
for key in js['data']['542250529'][0]['all']:
print("key:", key, " val:", js['data']['542250529'][0]['all'][key])

JSONDecoder erorr: Expecting ',' delimiter: line 10 column 29 (char 16011)

I am requesting URLs which contain JSON objects in python.
Some of these URLs contain several JSON objects (so several parts which are enclosed by these brackets: [])
When I tried to load these with json.loads() I got the error:
JSONDecodeError: Extra data: line 494 column 1 (char 50502)
Therefore I tried to split the JSON objects and write them into a list like this:
response = requests.get(url)
textinhalt = response.text
ref = textinhalt.rsplit('[')
tev = []
for line in ref:
daten = json.loads(line[line.find(r"{"):line.rfind("}")+1])
tev.append(daten)
But I get this error:
JSONDecodeError: Expecting ',' delimiter: line 10 column 29 (char 16011)
For example here is the part of the JSON that causes the Extra Data error(line 494):
474. "originalbild" : {
475. "alt" : "",
476. "height" : "3601",
477. "quelle" : "",
478. "src" : "/imgs/65/2/7/6/2/8/0/4/IMG_5630-3a13bb38ae440652.jpeg",
479. "untertitel" : "",
480. "width" : "5401"
481. },
482. "b" : "",
483. "redakteur_bid" : "",
484. "redakteur_email" : "",
485. "redakteur_inline" : "0",
486. "redakteur_kategorie" : "1",
487. "redakteur_kuerzel" : "",
488. "redakteur_nachname" : "",
489. "redakteur_redaktion" : "",
490. "redakteur_vorname" : "",
491. "ressort" : "",
492. "seitennavigation_liste" : [
493. {
494. "_baseurl" : "/Macoun-2019-Von-SwiftUI-bis-NFC-4547400.html",
495. "artikelseite" : 1,
496. "container_id" : 4547400,
497. "titel" : "Macoun 2019: Von SwiftUI bis NFC"
498. },
499. {
500. "artikelseite" : 2,
501. "titel" : "Motion Capturing in ARKit und RealityKit"
502. },
503. {
504. "artikelseite" : 3,
505. "titel" : "Bring deine Tests zum Rennen"
506. },
507. {
508. "artikelseite" : 4,
509. "titel" : "Tipps f\u00fcr Existenzgr\u00fcnder"
510. },
511. {
512. "artikelseite" : 5,
513. "titel" : "Cross Platform Entwicklung mit Kotlin"
514. }
515. ],
516. "seo_beschreibung" : "",
517. "seo_no_meta_description" : 0,
518. "seo_titel" : "",
519. "show_webdev_chooser" : null,
520. "socialbookmarks_keywords_ph_data" : "Apple, Entwickler, Programmieren, iOS, macOS",
521. "speakingurl_primitive" : "Macoun-2019-Von-SwiftUI-bis-NFC",
522. "teaser_anrisstext" : "",
523. "teaser_titel" : "",
524. "teaser_untertitel" : "",
525. "texte_anzahl_zeichen" : [
526. 12499
527. ],
What am I doing wrong?

From a single JSON create and insert multiple rows to BigQuery with Pub/Sub and Dataflow

I have created a Beam Dataflow pipeline that parses a single JSON from a PubSub topic:
{
"data": "test data",
"options": {
"test options": "test",
"test_units": {
"test": {
"test1": "test1",
"test2": "test2"
},
"test2": {
"test1": "test1",
"test2": "test2"
},
"test3": {
"test1": "test1",
"test2": "test2"
}
}
}
}
My output is something like this:
{
"data": "test data",
"test_test_unit": "test1",
"test_test_unit": "test2",
"test1_test_unit": "test1",
...
},
{
"data": "test data",
"test_test_unit": "test1",
"test_test_unit": "test2",
"test1_test_unit": "test1",
...
}
Basically what I'm doing is flattening the data based on how many test_units are in the JSON from the PubSub and returning that many rows in a single dict.
I have created a Class to flatten the data which returns a dict of rows.
Here is my Beam pipeline:
lines = ( p | 'Read from PubSub' >> beam.io.ReadStringsFromPubSub(known_args.input_topic)
| 'Parse data' >> beam.DoFn(parse_pubsub())
| 'Write to BigQuery' >> beam.io.WriteToBigQuery(
known_args.output_table,
schema=table_schema,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED
)
)
Here is some of the class to handle the flattening:
class parse_pubsub(beam.DoFn):
def process(self, element):
# ...
# flattens the data
# ...
return rows
Here is the error from the Stackdriver logs:
Error processing instruction -138. Original traceback is Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 151, in _execute
response = task() File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py",
line 186, in <lambda> self._execute(lambda: worker.do_instruction(work), work) File "/usr/local/lib/python2.7/
dist-packages/apache_beam/runners/worker/sdk_worker.py", line 265, in do_instruction request.instruction_id)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/runners/worker/sdk_worker.py", line 281, in
process_bundle delayed_applications = bundle_processor.process_bundle(instruction_id) File "/usr/local/lib/
python2.7/dist-packages/apache_beam/runners/worker/bundle_processor.py", line 552, in process_bundle op.finish()
File "apache_beam/runners/worker/operations.py", line 549, in
apache_beam.runners.worker.operations.DoOperation.finish def finish(self): File "apache_beam/runners/worker/
operations.py", line 550, in apache_beam.runners.worker.operations.DoOperation.finish with
self.scoped_finish_state: File "apache_beam/runners/worker/operations.py", line 551, in
apache_beam.runners.worker.operations.DoOperation.finish self.dofn_runner.finish() File "apache_beam/runners/
common.py", line 758, in apache_beam.runners.common.DoFnRunner.finish self._invoke_bundle_method
(self.do_fn_invoker.invoke_finish_bundle) File "apache_beam/runners/common.py", line 752, in
apache_beam.runners.common.DoFnRunner._invoke_bundle_method self._reraise_augmented(exn) File "apache_beam/
runners/common.py", line 777, in apache_beam.runners.common.DoFnRunner._reraise_augmented raise_with_traceback
(new_exn) File "apache_beam/runners/common.py", line 750, in
apache_beam.runners.common.DoFnRunner._invoke_bundle_method bundle_method() File "apache_beam/runners/common.py",
line 361, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle def invoke_finish_bundle(self): File
"apache_beam/runners/common.py", line 365, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle
self.signature.finish_bundle_method.method_value()) File "/usr/local/lib/python2.7/dist-packages/apache_beam/io/
gcp/bigquery.py", line 630, in finish_bundle self._flush_batch() File "/usr/local/lib/python2.7/dist-packages/
apache_beam/io/gcp/bigquery.py", line 637, in _flush_batch table_id=self.table_id, rows=self._rows_buffer) File
# HERE:
"/usr/local/lib/python2.7/dist-packages/apache_beam/io/gcp/bigquery_tools.py",
line 611, in insert_rows for k, v in iteritems(row): File "/usr/local/lib/python2.7/dist-packages/future/utils/
__init__.py", line 308, in iteritems func = obj.items AttributeError: 'int' object has no attribute 'items'
[while running 'generatedPtransform-135']
I've also tried returning a list and had the same error that 'list' object has no 'items' therefore I'm converting the list rows to a dict like this:
0 {
"data": "test data",
"test_test_unit": "test1",
"test_test_unit": "test2",
"test1_test_unit": "test1",
...
},
1 {
"data": "test data",
"test_test_unit": "test1",
"test_test_unit": "test2",
"test1_test_unit": "test1",
...
}
I'm fairly new to this so any help will be appreciated!
You'll need to use the yield keyword to emit multiple outputs in your DoFn. For example:
class parse_pubsub(beam.DoFn):
def process(self, element):
# ...
# flattens the data
# ...
for row in rows:
yield row

Categories