I am looking for some advice on how I can upsert or replace existing user entity.
I tried couple of API's documented here and also here.
The entities are read from database and the plan is to keep them in sync with database values as a scheduled job.
Update: Code Snippet
client_options = {"quota_project_id": gcp_default_project_id,
"api_endpoint": "us-central1-dialogflow.googleapis.com:443"}
client = EntityTypesClient(credentials=credentials_det, client_options=client_options)
entity_type = v3beta.EntityType()
entity_type.display_name = entity_display_name
entity_type.kind = "KIND_REGEXP"
entity_type.entities = entity_json
# Initialize request argument(s)
request = v3beta.UpdateEntityTypeRequest(
response = client.update_entity_type(request=request)
entity_json is fetched from DB and created as JSON object as below.
df = get_data.get_df_details(config_dir, entity_data_source, sql)
username = df['username'].tolist()
entity_json = []
for each in username:
each_entity_value = {}
each_entity_value['value'] = each
each_entity_value['synonyms'] = [each]
Here's the Trace
Traceback (most recent call last):
File "/Users/<some_dir>/df_ins_entities/df_ins_entities/ins_entity_val.py", line 116, in
ins_now(config_dir, input_entity_name, entity_data_source)
File "/Users/<some_dir>/df_ins_entities/df_ins_entities/ins_entity_val.py", line 96, in ins_now
response = client.update_entity_type(request=request)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/cloud/dialogflowcx_v3beta1/services/entity_types/client.py", line 902, in update_entity_type
response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/api_core/gapic_v1/method.py", line 154, in call
return wrapped_func(args, **kwargs)
File "/Users/<some_dir>/df_ins_entities/lib/python3.9/site-packages/google/api_core/grpc_helpers.py", line 59, in error_remapped_callable
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Resource name '' does not match 'projects//locations//agents//entityTypes/*'.
Process finished with exit code 1
I'm connecting to Cloud SQL postgres via the python connector like this:
ip_type = IPTypes.PRIVATE if os.environ.get("PRIVATE_IP") else IPTypes.PUBLIC
def getconn() -> pg8000.dbapi.Connection:
conn: pg8000.dbapi.Connection = connector.connect(
return conn
pool = sqlalchemy.create_engine(
with pool.connect() as connection:
result = connection.execute(query).fetchall()
for row in result:
result_value = row[0]
# This doesn't work
except ClientResponseError as ex:
if ex.code == "429":
backoff_seconds = 30
logger.warning("Getting 'Too Many Requests' error from Cloud Sql api. Waiting to reconnect.")
Sometimes I see these errors:
Traceback (most recent call last): File
line 414, in _refresh_task refresh_data = await refresh_task File
line 350, in _perform_refresh metadata, ephemeral_cert = await
asyncio.gather( File
line 98, in _get_metadata resp = await client_session.get(url,
headers=headers, raise_for_status=True) File
"/usr/local/lib/python3.10/site-packages/aiohttp/client.py", line 643,
in _request resp.raise_for_status() File
line 1005, in raise_for_status raise ClientResponseError(
aiohttp.client_exceptions.ClientResponseError: 429, message='Too Many
It spews those errors occasionally, but recovers. I want to handle this more elegantly, but it seems trying to catch ClientResponseError is not working. How can I catch these errors and set a backoff timer?
Maybe I need to catch the call to connector.connect()?
I am trying to use Reddit's developer API to build a simple scraper that grabs posts and their replies in a target subreddit and produces JSON with the information.
I am getting a 404 error that I don't understand.
This is my code:
import praw
import json
def scrape(subreddit, limit):
r = praw.Reddit(user_agent='Reddit data organizer 1.0 by /u/reallymemorable', client_id='none of your business', client_secret='none of your business')
submissions = r.subreddit(subreddit).get_hot(limit=limit)
for submission in submissions:
data = {}
data['title'] = submission.title
data['score'] = submission.score
data['url'] = submission.url
data['author'] = str(submission.author)
data['subreddit'] = str(submission.subreddit)
data['num_comments'] = submission.num_comments
data['over_18'] = submission.over_18
data['selftext'] = submission.selftext
data['is_self'] = submission.is_self
data['name'] = submission.name
data['created_utc'] = submission.created_utc
data['permalink'] = submission.permalink
data['domain'] = submission.domain
data['id'] = submission.id
data['kind'] = submission.kind
scrape('https://www.reddit.com/r/funny/', 25)
When I run it, I get this:
reallymemorable#Christians-MBP Desktop % python3 fetch-data-subreddit.py
Traceback (most recent call last):
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 26, in <module>
scrape('https://www.reddit.com/r/augmentedreality/comments/yv7sn8/ar_maximum_distance/', 25)
File "/Users/reallymemorable/Desktop/fetch-data-subreddit.py", line 6, in scrape
submissions = r.subreddit(subreddit).get_hot(limit=limit)
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/base.py", line 34, in __getattr__
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 583, in _fetch
data = self._fetch_data()
File "/opt/homebrew/lib/python3.9/site-packages/praw/models/reddit/subreddit.py", line 580, in _fetch_data
return self._reddit.request(method="GET", params=params, path=path)
File "/opt/homebrew/lib/python3.9/site-packages/praw/util/deprecate_args.py", line 43, in wrapped
return func(**dict(zip(_old_args, args)), **kwargs)
File "/opt/homebrew/lib/python3.9/site-packages/praw/reddit.py", line 941, in request
return self._core.request(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 330, in request
return self._request_with_retries(
File "/opt/homebrew/lib/python3.9/site-packages/prawcore/sessions.py", line 266, in _request_with_retries
raise self.STATUS_EXCEPTIONS[response.status_code](response)
prawcore.exceptions.NotFound: received 404 HTTP response
r.subreddit(subreddit) - subreddit should just be the name of the subreddit e.g. "funny" and not the full URL.
See the docs here: https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#obtain-a-subreddit
I am trying to pull a huge amount of data (in millions) and I am getting the following error when running my code. If I run the same code with a small range (to be exact a range of 2) it runs successfully. Please assist in helping me know if this is my issue or is coming from the API side
The Error I am getting
DEBUG:google.api_core.bidi:Started helper thread Thread-ConsumeBidirectionalStream
DEBUG:google.api_core.bidi:Thread-ConsumeBidirectionalStream caught error 400 Request contains an invalid argument. and will exit. Generally this is due to the RPC itself being cancelled and the error will be surfaced to the calling code.
Traceback (most recent call last):
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 147, in error_remapped_callable
return _StreamingResponseIterator(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 73, in __init__
self._stored_first_result = next(self._wrapped)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/grpc/_channel.py", line 426, in __next__
return self._next()
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/grpc/_channel.py", line 826, in _next
raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Request contains an invalid argument."
debug_error_string = "{"created":"#1652904360.179503883","description":"Error received from peer ipv4:","file":"src/core/lib/surface/call.cc","file_line":952,"grpc_message":"Request contains an invalid argument.","grpc_status":3}"
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/bidi.py", line 636, in _thread_main
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/bidi.py", line 279, in open
call = self._start_rpc(iter(request_generator), metadata=self._rpc_metadata)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/services/big_query_write/client.py", line 678, in append_rows
response = rpc(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/gapic_v1/method.py", line 154, in __call__
return wrapped_func(*args, **kwargs)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/retry.py", line 283, in retry_wrapped_func
return retry_target(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/retry.py", line 190, in retry_target
return target()
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 151, in error_remapped_callable
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument.
INFO:google.api_core.bidi:Thread-ConsumeBidirectionalStream exiting
DEBUG:google.cloud.bigquery_storage_v1.writer:Finished stopping manager.
Traceback (most recent call last):
File "write_data_to_db2.py", line 207, in <module>
p.append_rows_pending(project_id='dwingestion', dataset_id='ke',
File "write_data_to_db2.py", line 188, in append_rows_pending
response_future_1 = append_rows_stream.send(request)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/writer.py", line 234, in send
return self._open(request)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/writer.py", line 207, in _open
raise request_exception
google.api_core.exceptions.Unknown: None There was a problem opening the stream. Try turning on DEBUG level logs to see the error.
Summary Of My Code
def whole_teltel_raw_data():
# Creating a session to introduce network consistency
session = requests.Session()
retry = Retry(connect=3, backoff_factor=1.0)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
url = "https://my_api_url"
the_headers = {"X-API-KEY": 'my key'}
offset_limit = 1249500
teltel_data = []
# Loop through the results and if present extend the teltel_data list
# ======================================================================================================================
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'dwingestion-b033d9535e9d.json'
def create_row_data(tuple_data):
call_id, starttime, stoptime, direction, type, status, duration_sec, rate, cost, transfer, extra_prefix, audio_url, \
hangup_element, caller_number, caller_type, caller_cid, caller_dnid, caller_user_id, caller_user_short, \
callee_number, calle_type, callee, hangup_element_name, hangup_element_element, callee_user_id, callee_user_short, \
caller = tuple_data
row = teltel_call_data_pb2.TeltelCall()
row.call_id = call_id
row.starttime = starttime
row.stoptime = stoptime
row.direction = direction
row.type = type
row.status = status
row.duration_sec = duration_sec
row.rate = rate
row.cost = cost
row.transfer = transfer
row.extra_prefix = extra_prefix
row.audio_url = audio_url
row.hangup_element = hangup_element
row.caller_number = caller_number
row.caller_type = caller_type
row.caller_cid = caller_cid
row.caller_dnid = caller_dnid
row.caller_user_id = caller_user_id
row.caller_user_short = caller_user_short
row.callee_number = callee_number
row.calle_type = calle_type
row.callee = callee
row.hangup_element_name = hangup_element_name
row.hangup_element_title = hangup_element_element
row.callee_user_id = callee_user_id
row.callee_user_short = callee_user_short
row.caller = caller
return row.SerializeToString()
# Creating connection to the data warehouse
def create_bigquery_storage_client(google_credentials):
return bigquery_storage_v1.client.BigQueryWriteClient(
class GcpBigqueryStorageService(object):
def __init__(self, google_credentials=None, gcp_config=None):
self.client = create_bigquery_storage_client(google_credentials)
self.config = gcp_config
def append_rows_pending(self, project_id: str, dataset_id: str, table_id: str):
"""Create a write stream, write some sample data, and commit the stream."""
# write_client = self.client
parent = self.client.table_path(project_id, dataset_id, table_id)
write_stream = types.WriteStream()
# When creating the stream, choose the type. Use the PENDING type to wait
write_stream.type_ = types.WriteStream.Type.PENDING
write_stream = self.client.create_write_stream(
parent=parent, write_stream=write_stream
stream_name = write_stream.name
# Create a template with fields needed for the first request.
request_template = types.AppendRowsRequest()
# The initial request must contain the stream name.
request_template.write_stream = stream_name
# So that BigQuery knows how to parse the serialized_rows, generate a
# protocol buffer representation of your message descriptor.
proto_schema = types.ProtoSchema()
proto_descriptor = descriptor_pb2.DescriptorProto()
proto_schema.proto_descriptor = proto_descriptor
proto_data = types.AppendRowsRequest.ProtoData()
proto_data.writer_schema = proto_schema
request_template.proto_rows = proto_data
# Some stream types support an unbounded number of requests. Construct an
# AppendRowsStream to send an arbitrary number of requests to a stream.
append_rows_stream = writer.AppendRowsStream(self.client, request_template)
# Create a batch of row data by appending proto2 serialized bytes to the
# serialized_rows repeated field.
proto_rows = types.ProtoRows()
row_number = 0
for row in whole_teltel_raw_data():
# checking the writing progress
row_number = row_number + 1
print("Writing to the database row number", row_number)
# The first request must always have an offset of 0.
request = types.AppendRowsRequest()
proto_data = types.AppendRowsRequest.ProtoData()
proto_data.rows = proto_rows
request.proto_rows = proto_data
# A PENDING type stream must be "finalized" before being committed. No new
# records can be written to the stream after this method has been called.
# Commit the stream you created earlier.
batch_commit_write_streams_request = types.BatchCommitWriteStreamsRequest()
batch_commit_write_streams_request.parent = parent
batch_commit_write_streams_request.write_streams = [write_stream.name]
print(f"Writes to stream: '{write_stream.name}' have been committed.")
p = GcpBigqueryStorageService()
p.append_rows_pending(project_id='my_project', dataset_id='my_id', table_id='teltel_call_2')
I am new to Marshmallow (3.10.0) and I am lost and need help trying to figure out what causes the following error:
AssertionError: ["Input Error - exten: ['Missing data for required field.']"]
The traceback of the error is the following:
Traceback (most recent call last):
File "/root/wazo_virtualenv_python37/lib/python3.7/site-packages/nose/case.py", line 198, in runTest
File "/root/wazo-confd/integration_tests/suite/helpers/wrappers.py", line 81, in decorated
result = func(*new_args, **kwargs)
File "/root/wazo-confd/integration_tests/suite/helpers/wrappers.py", line 81, in decorated
result = func(*new_args, **kwargs)
File "/root/wazo-confd/integration_tests/suite/base/test_call_filter_surrogate_user.py", line 216, in test_get_surrogates_callfilter_exten_when_disabled
confd.extensions.features(feature['id']).put({'enabled': False}).assert_updated()
File "/root/wazo-confd/integration_tests/suite/helpers/client.py", line 272, in assert_updated
File "/root/wazo-confd/integration_tests/suite/helpers/client.py", line 242, in assert_status
assert_that(self.response.status_code, is_in(statuses), self.response.text)
So it seems that the test function test_get_surrogates_callfilter_exten_when_disabled is failing:
def test_get_surrogates_callfilter_exten_when_disabled(call_filter, user):
response = confd.extensions.features.get(search="bsfilter")
feature = response.items[0]
---> (line 216 in traceback): confd.extensions.features(feature['id']).put({'enabled': False}).assert_updated()
with a.call_filter_surrogate_user(call_filter, user):
response = confd.callfilters(call_filter['id']).get()
users=contains(has_entries(exten=None, uuid=user['uuid']))
{'enabled': feature['enabled']}
the feature_extension schema is defined as the following:
class ExtensionFeatureSchema(BaseSchema):
id = fields.Integer(dump_only=True)
exten = fields.String(validate=Regexp(EXTEN_REGEX), required=True)
context = fields.String(dump_only=True)
feature = fields.String(attribute='typeval', dump_only=True)
enabled = fields.Boolean()
links = ListLink(Link('extensions_features'))
and the put function:
def put(self):
form = self.schema().load(request.get_json())
variables = [self.model(**option) for option in form]
self.service.edit(self.section_name, variables)
return '', 204
I have tried many solutions that I found online; but they did not fix the issue for me:
1 + pass partial=True to the load function:
form = self.schema().load(request.get_json(), partial=True)
2 + remove required=True from the field definition; this made the above error go away but failed many other tests that I have.
I am currently out of ideas, so if anyone has an idea how to fix the issue.
I am trying to get the post's text of a megagroup in Telegram using Telethon. I could get the messages from chats however megagroup's posts cannot be retrieved using the same method (How to get channels and groups data from my Telegram account?(Python)). Are megagroups open to fetch their posts by being a simple user using telethon?
def get_entity_data(entity_id, limit):
entity = client.get_entity(entity_id)
posts = client(GetHistoryRequest(peer=entity, limit=limit, offset_date=None, offset_id=0, max_id=0, min_id=0, add_offset=0, hash=0))
messages = []
for message in posts.messages:
return messages
result = client(GetDialogsRequest(offset_date=None, offset_id=0, offset_peer=InputPeerEmpty(), limit=100, hash=0)) entities = result.chats entities.reverse()
for entity in entities:
title = entity.title
messages = get_entity_data(entity.id, 10)
print(title + ' :')
and the error message is:
Traceback (most recent call last):
File "./search_message3.py", line 61, in <module>
messages = get_entity_data(entity.id, 10)
File "./search_message3.py", line 48, in get_entity_data
entity = client.get_entity(entity_id)
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/sync.py", line 39, in syncified
return loop.run_until_complete(coro)
File "/home/carlos/.miniconda3/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 316, in get_entity
chats = (await self(
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 30, in __call__
return await self._call(self._sender, request, ordered=ordered)
File "/home/carlos/.local/lib/python3.8/site-packages/telethon/client/users.py", line 84, in _call
result = await future
telethon.errors.rpcerrorlist.PeerIdInvalidError: An invalid Peer was used. Make sure to pass the right peer type and that the value is valid (for instance, bots cannot start conversations) (caused by GetChatsRequest)