I am using Batching requests in Google Analytics API(Python). Link to Batching : https://developers.google.com/api-client-library/python/guide/batch
Batching works fine when all the records via .add() are correct(valid). When one or more values are invalid, then the batching fails for all the records.
I added a call back function to handle the error and I saw that BAtching request is failing for all the records in the batch ( as opposed to only the invalid record). Is there a way to handle the error and skip the row/record which is invalid and continue with the rest of the records in the batch?
Below is the sample code I used and the error message :
def add_user_callback(request_id, response, exception):
if exception:
print "error :",exception
else:
print "successful"
def main():
## code to set the account, property and other variables
batch.add(service.management().webpropertyUserLinks().insert(
accountId=account_id,
webPropertyId=property_at,
body={
'permissions': {
'local': [
'READ_AND_ANALYZE'
]
},
'userRef': {
'email': 'valid_address#domain.com'
}
}))
batch.add(service.management().webpropertyUserLinks().insert(
accountId=account_id,
webPropertyId=property_at,
body={
'permissions': {
'local': [
'READ_AND_ANALYZE'
]
},
'userRef': {
'email': 'invalid_address#ddomain.com' ## i used a dummy id : pppp#domain.com
}
}))
batch.execute()
#Error :
#error : <HttpError 400 when requesting https://www.googleapis.com/analytics/v3/management/accounts/62974313/webproperties/UA-62974313-35/entityUserLinks?alt=json returned "Value for field user.email = ppppp#domain.com is not valid.">
#error : <HttpError 400 when requesting https://www.googleapis.com/analytics/v3/management/accounts/62974313/webproperties/UA-62974313-11/entityUserLinks?alt=json returned "Value for field user.email = ppppp#domain.com is not valid.">
Please let me know if you need more info.
Let's assume you have a list of users you want to add to profiles stored in a list users.
You can remove the bad emails with the following callback function:
def call_back(request_id, response, exception):
if exception is not None:
if isinstance(exception, HttpError):
message = json.loads(exception.content)['error']['message']
bad = 'Value for field user.email = (\S*) is not valid.'
match = re.match(bad, message)
if match:
bad_user = match.group(1)
if bad_user in users:
users.remove(bad_user)
else:
print response
After all the failed calls return you can re-attempt the batch call again by looping through the users and constructing a new batch request:
batch = BatchHttpRequest(callback=call_back)
for user in users:
request = analytics.management().profileUserLinks().insert(
accountId=ACCOUNT_ID,
webPropertyId=PROFILE_ID,
profileId=profile,
body={
'permissions': {'local': ['READ_AND_ANALYZE']},
'userRef': {'email': user}
}
)
batch.add(request, request_id=PROFILE_ID + user)
batch.execute()
Related
Here's the code of my test:
#pytest.mark.run(order=18)
def test_post(client):
"""
Test whether the test client has been added or not.
"""
print(f"\n\n {'>>>'*6} TESTING CLIENT POST {'<<<'*6} \n\n")
access_token = cognito_auth.get_access_token({
"username": os.environ["TEST_USER_EMAIL"],
"password": os.environ["TEST_USER_PASSWORD"]
})
data = {
"client_code": "999999.9.9",
"name": "AUTOMATED TEST CLIENT",
"short_name": "AUTOMATED TEST CLIENT",
"br_cnpj": "123809128312",
"br_im": "213798238974324",
"br_ie": "7893248932794324",
"address_id": 7665,
"is_inserted": False,
"skin_id": 1,
"plan_id": 1,
"organization": "CFR-100000",
"is_api_connected": False
}
response = client.post('http://localhost:5000/dev/api/client', json=data, headers={
"Authorization": f"Bearer {access_token}"
})
print("THE RESPONSE")
print(response.json)
According to this doc, everything should be fine, but instead, I get the following postgres error:
{'error': {'code': 500, 'type': '/errors/internal-server-error', 'message': '(psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type integer: ""\nLINE 1: ... plan_id, organization, is_api_connected) VALUES (\'\', \'99999...\n ^\n\n[SQL: INSERT INTO tb_client (client_id, client_code, name, short_name, br_cnpj, br_im, br_ie, address_id, is_inserted, skin_id, plan_id, organization, is_api_connected) VALUES (%(client_id)s, %(client_code)s, %(name)s, %(short_name)s, %(br_cnpj)s, %(br_im)s, %(br_ie)s, %(address_id)s, %(is_inserted)s, %(skin_id)s, %(plan_id)s, %(organization)s, %(is_api_connected)s) ON CONFLICT ON CONSTRAINT tb_client_client_code_key DO NOTHING]\n[parameters: {\'client_id\': \'\', \'client_code\': \'999999.9.9\', \'name\': \'AUTOMATED TEST CLIENT\', \'short_name\': \'AUTOMATED TEST CLIENT\', \'br_cnpj\': \'123809128312\', \'br_im\': \'213798238974324\', \'br_ie\': \'7893248932794324\', \'address_id\': 7665, \'is_inserted\': False, \'skin_id\': 1, \'plan_id\': 1, \'organization\': \'CFR-100000\', \'is_api_connected\': False}]\n(Background on this error at: http://sqlalche.me/e/13/9h9h)'}}
Is the client post function seriously only expecting strings for json? It seems that the problem goes away when I use only strings, but I'm not expecting that on the API.
Even if I include "'Content-Type': 'application/json'" on the headers, I get the same error. What could be happening?
So I am writing unittests for a project and I am testing register() function.
Here is it:
def register():
# Get information about user
username = request.get_json().get("username")
password = request.get_json().get("password")
name = request.get_json().get("name")
email = request.get_json().get("email")
# Put information about user in a tuple
values = (
None,
username,
User.hash_password(password),
name,
email,
None
)
try:
# Create user and update session
User(*values).create()
ActiveUser.logged_in = True
ActiveUser.username = username
info_log.info("User %s registered successfully." % username)
return jsonify(success=True, message="Registration successful!")
except pymongo.errors.DuplicateKeyError as e:
# Catch pymongo exception
return jsonify(success=False, message="Duplicated username or email!"), 403
I want to have three tests: valid, invalid (duplicate username), invalid (duplicate email).
# Register helper function
def register(self, username, password, name, email):
return self.app.post(
"/register",
data = json.dumps(dict(username = username, password = password, name = name, email = email)),
content_type='application/json',
follow_redirects = True
)
def test_02_valid_user_registration(self):
response = self.register('test', '12345678', 'Tester 1', 'test#mail.mail')
self.assertEqual(response.status_code, 200)
self.assertIn(b'Registration successful!', response.data)
def test_03_invalid_user_registration_duplicate_username(self):
response = self.register('test', '12345678', 'Tester 2', 'test1#mail.mail')
self.assertEqual(response.status_code, 403)
self.assertIn(b'Duplicate username or email!', response.data)
def test_04_invalid_user_registration_duplicate_email(self):
response = self.register('test2', '12345678', 'Tester 3', 'test#mail.mail')
self.assertEqual(response.status_code, 403)
self.assertIn(b'Duplicate username or email!', response.data)
As expected I get DuplicateKeyError, because I have set Unique for those parameters in the database.
pymongo.errors.DuplicateKeyError: E11000 duplicate key error collection: user.users index: username_1 dup key: { username: "test" }
Is there a way to get which is the duplicated item from the DuplicateKeyError, so I can have separate unit tests for duplicate username and email?
I know this is more of a component/integration testing rather than unit testing, but this is the only way I know how to do it in Python 3.
So I started digging through the implementation of DuplicateKeyError and I found that it contains code and details.
I printed the details of the error and got this:
{
"message": {
"code": 11000,
"errmsg": "E11000 duplicate key error collection: user.users index: username_1 dup key: {
username: \"test\"
}",
"index": 0,
"keyPattern": {
"username": 1
},
"keyValue": {
"username": "test"
}
},
"success": false
}
After that it was easy to get the two tests to work.
try:
# code
except pymongo.errors.DuplicateKeyError as e:
# Catch pymongo exception
key = list(e.details.get("keyValue").keys())[0]
value = e.details.get("keyValue").get(key)
return jsonify(success=False, message="Duplicate %s: %s" % (key, value)), 403
And the tests:
def test_03_invalid_user_registration_duplicate_username(self):
response = self.register("test", "12345678", "Tester 2", "test1#mail.mail")
self.assertEqual(response.status_code, 403)
self.assertIn(b"Duplicate username: test", response.data)
def test_04_invalid_user_registration_duplicate_email(self):
response = self.register("test", "12345678", "Tester 3", "test#mail.mail")
self.assertEqual(response.status_code, 403)
self.assertIn(b"Duplicate email: test#mail.mail", response.data)
I am going to create chatbot using websockets. Each user can have their own account. I have Django backend and frontend written in Angular. At the moment I have a problem with message object. To wit I get this in backend:
django_1 | django.db.utils.IntegrityError: null value in column "user_id" violates not-null constraint
django_1 | DETAIL: Failing row contains (212, {"message":"Hello"}, null).
It looks as I wouldn't send user ID from frontend. Maybe I should send something like this {"message":"Hello", "id":1}? I wonder how can I solve it and how it should be done properly?
My Django message model looks in this way:
class Message(models.Model):
user = models.ForeignKey('auth.User')
message = models.CharField(max_length=200)
This is my backend consumer:
#channel_session_user_from_http
def msg_consumer(message):
text = message.content.get('text')
Message.objects.create(
message=text,
)
Group("chat").send({'text': text})
#channel_session_user
def ws_connect(message):
# Accept the connection
message.reply_channel.send({"accept": True})
# Add to the chat group
Group("chat").add(message.reply_channel)
message.reply_channel.send({
"text": json.dumps({
'message': 'Welcome'
})
})
#channel_session_user
def ws_receive(message):
message.reply_channel.send({"accept": True})
print("Backend received message: " + message.content['text'])
Message.objects.create(
message = message.content['text'],
)
Channel("chat").send({
"text": json.dumps({
'message': 'Can we start?'
})
})
#channel_session_user
def ws_disconnect(message):
Group("chat").discard(message.reply_channel)
This is part of my Angular component:
export class HomeComponent {
response: string;
response2: string;
constructor(
private chatService: ChatService,
private router: Router,
private http: Http,
) {
chatService.messages.subscribe(msg => {
this.response = msg.message;
console.log("Response from backend: " + msg.message);
});
}
private message = {
message: 'this is a test message'
}
sendMsg() {
console.log('new message from client to websocket: ', this.message);
this.chatService.messages.next(this.message);
return this.message.message;
}
send(msg) {
this.message.message = msg;
this.sendMsg();
}
login() {
return this.http.get('/data', )
.map(response => response.json())
.subscribe(response2 => this.response2 = response2);
}
}
#Component({
selector: 'key-up3',
template: `
<input #box (keyup.enter)="keyup7.emit(box.value)">
<p>{{value}}</p>
`
})
export class KeyUpComponent_v3 {
#Output() keyup7 = new EventEmitter<string>();
}
UPDATE
At the moment I solved it in the way shown below in backend.
def ws_receive(message):
message.reply_channel.send({"accept": True})
print("Backend received message: " + message.content['text'])
data = json.loads(message.content['text'])
Message.objects.create(
user_id=data['user_id'],
message = data['message'],
)
Error seems to indicate your Message instance was created without user_id.
What if you add this argument when creating a new Message instance ?
#channel_session_user_from_http
def msg_consumer(message):
text = message.content.get('text')
Message.objects.create(
user=message.user, # <- Here is the user associated
message=text,
)
Group("chat").send({'text': text})
I try to do a payment with api yandex money.
I use
instance_id = ExternalPayment.get_instance_id(client_id)['instance_id']
api = ExternalPayment(instance_id)
def wallet_payments(access_token, ym_account, total, api):
wallet = Wallet(access_token)
request_options = {
"pattern_id": "p2p",
"to": ym_account,
"amount_due": total,
"comment": "test payment comment from yandex-money-python",
"message": "test payment message from yandex-money-python",
"label": "testPayment",
"test_payment": True,
"test_result": "success"
}
request_result = api.request(request_options)
process_payment = api.process({
"request_id": request_result['request_id'],
})
return process_payment['status']
request_result['status'] returns success, but after
`process_payment = api.process({
"request_id": request_result['request_id'],
})`
I get {'status': 'refused', 'error': 'illegal_param_ext_auth_success_uri'}.
How can I solve that?
From the yandex documentation:
illegal_param_ext_auth_success_uri:
The ext_auth_success_uri parameter has a missing or invalid value.
So you probably need to define a ext_auth_success_uri parameter which will be a listener url that receive yandex api response in case of success.
And you probably will need this one too which is the same but in case of error:
illegal_param_ext_auth_fail_uri:
The ext_auth_fail_uri parameter has a missing or invalid value.
source: https://tech.yandex.com/money/doc/dg/reference/process-payment-docpage/
When scrolling in elasticsearch it is important to provide at each scroll the latest scroll_id:
The initial search request and each subsequent scroll request returns
a new scroll_id — only the most recent scroll_id should be used.
The following example (taken from here) puzzle me. First, the srolling initialization:
rs = es.search(index=['tweets-2014-04-12','tweets-2014-04-13'],
scroll='10s',
search_type='scan',
size=100,
preference='_primary_first',
body={
"fields" : ["created_at", "entities.urls.expanded_url", "user.id_str"],
"query" : {
"wildcard" : { "entities.urls.expanded_url" : "*.ru" }
}
}
)
sid = rs['_scroll_id']
and then the looping:
tweets = [] while (1):
try:
rs = es.scroll(scroll_id=sid, scroll='10s')
tweets += rs['hits']['hits']
except:
break
It works, but I don't see where sid is updated... I believe that it happens internally, in the python client; but I don't understand how it works...
This is an old question, but for some reason came up first when searching for "elasticsearch python scroll". The python module provides a helper method to do all the work for you. It is a generator function that will return each document to you while managing the underlying scroll ids.
https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
Here is an example of usage:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
query = {
"query": {"match_all": {}}
}
es = Elasticsearch(...)
for hit in scan(es, index="my-index", query=query):
print(hit["_source"]["field"])
Using python requests
import requests
import json
elastic_url = 'http://localhost:9200/my_index/_search?scroll=1m'
scroll_api_url = 'http://localhost:9200/_search/scroll'
headers = {'Content-Type': 'application/json'}
payload = {
"size": 100,
"sort": ["_doc"]
"query": {
"match" : {
"title" : "elasticsearch"
}
}
}
r1 = requests.request(
"POST",
elastic_url,
data=json.dumps(payload),
headers=headers
)
# first batch data
try:
res_json = r1.json()
data = res_json['hits']['hits']
_scroll_id = res_json['_scroll_id']
except KeyError:
data = []
_scroll_id = None
print 'Error: Elastic Search: %s' % str(r1.json())
while data:
print data
# scroll to get next batch data
scroll_payload = json.dumps({
'scroll': '1m',
'scroll_id': _scroll_id
})
scroll_res = requests.request(
"POST", scroll_api_url,
data=scroll_payload,
headers=headers
)
try:
res_json = scroll_res.json()
data = res_json['hits']['hits']
_scroll_id = res_json['_scroll_id']
except KeyError:
data = []
_scroll_id = None
err_msg = 'Error: Elastic Search Scroll: %s'
print err_msg % str(scroll_res.json())
Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#search-request-scroll
In fact the code has a bug in it - in order to use the scroll feature correctly you are supposed to use the new scroll_id returned with each new call in the next call to scroll(), not reuse the first one:
Important
The initial search request and each subsequent scroll request returns
a new scroll_id — only the most recent scroll_id should be used.
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-scroll.html
It's working because Elasticsearch does not always change the scroll_id in between calls and can for smaller result sets return the same scroll_id as was originally returned for some time. This discussion from last year is between two other users seeing the same issue, the same scroll_id being returned for awhile:
http://elasticsearch-users.115913.n3.nabble.com/Distributing-query-results-using-scrolling-td4036726.html
So while your code is working for a smaller result set it's not correct - you need to capture the scroll_id returned in each new call to scroll() and use that for the next call.
self._elkUrl = "http://Hostname:9200/logstash-*/_search?scroll=1m"
self._scrollUrl="http://Hostname:9200/_search/scroll"
"""
Function to get the data from ELK through scrolling mechanism
"""
def GetDataFromELK(self):
#implementing scroll and retriving data from elk to get more than 100000 records at one search
#ref :https://www.elastic.co/guide/en/elasticsearch/reference/6.8/search-request-scroll.html
try :
dataFrame=pd.DataFrame()
if self._elkUrl is None:
raise ValueError("_elkUrl is missing")
if self._username is None:
raise ValueError("_userNmae for elk is missing")
if self._password is None:
raise ValueError("_password for elk is missing")
response=requests.post(self._elkUrl,json=self.body,auth=(self._username,self._password))
response=response.json()
if response is None:
raise ValueError("response is missing")
sid = response['_scroll_id']
hits = response['hits']
total= hits["total"]
if total is None:
raise ValueError("total hits from ELK is none")
total_val=int(total['value'])
url = self._scrollUrl
if url is None:
raise ValueError("scroll url is missing")
#start scrolling
while(total_val>0):
#keep search context alive for 2m
scroll = '2m'
scroll_query={"scroll" : scroll, "scroll_id" : sid }
response1=requests.post(url,json=scroll_query,auth=(self._username,self._password))
response1=response1.json()
# The result from the above request includes a scroll_id, which should be passed to the scroll API in order to retrieve the next batch of results
sid = response1['_scroll_id']
hits=response1['hits']
data=response1['hits']['hits']
if len(data)>0:
cleanDataFrame=self.DataClean(data)
dataFrame=dataFrame.append(cleanDataFrame)
total_val=len(response1['hits']['hits'])
num=len(dataFrame)
print('Total records recieved from ELK=',num)
return dataFrame
except Exception as e:
logging.error('Error while getting the data from elk', exc_info=e)
sys.exit()
from elasticsearch import Elasticsearch
elasticsearch_user_name ='es_username'
elasticsearch_user_password ='es_password'
es_index = "es_index"
es = Elasticsearch(["127.0.0.1:9200"],
http_auth=(elasticsearch_user_name, elasticsearch_user_password))
query = {
"query": {
"bool": {
"must": [
{
"range": {
"es_datetime": {
"gte": "2021-06-21T09:00:00.356Z",
"lte": "2021-06-21T09:01:00.356Z",
"format": "strict_date_optional_time"
}
}
}
]
}
},
"fields": [
"*"
],
"_source": False,
"size": 2000,
}
resp = es.search(index=es_index, body=query, scroll="1m")
old_scroll_id = resp['_scroll_id']
results = resp['hits']['hits']
while len(results):
for i, r in enumerate(results):
# do something whih data
pass
result = es.scroll(
scroll_id=old_scroll_id,
scroll='1m' # length of time to keep search context
)
# check if there's a new scroll ID
if old_scroll_id != result['_scroll_id']:
print("NEW SCROLL ID:", result['_scroll_id'])
# keep track of pass scroll _id
old_scroll_id = result['_scroll_id']
results = result['hits']['hits']