I am trying to update google scheduler job.But,it does not work well.How do I this?
Now I understand what problem is.The problem is that the option class is wrong.
How should I write this place?
Target(topic_name="projects/aaa/topics/bbb",data="test".encode("utf-8"))
The error message.
TypeError: Parameter to MergeFrom() must be instance of same class: expected google.cloud.scheduler.v1.PubsubTarget got PubsubTarget.
my code
import os
from google.cloud import scheduler_v1
from google.cloud.scheduler_v1 import PubsubTarget as Target
from google.protobuf import field_mask_pb2
pub = Target(topic_name="projects/aaa/topics/bbb",data="test".encode("utf-8"))
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './test.json'
client = scheduler_v1.CloudSchedulerClient()
job = { 'name': "projects/aaa/locations/us-central1/jobs/test",
"description": "c",
"schedule": "59 * * * *",
"pubsub_target":pub
}
update_mask = field_mask_pb2.FieldMask(paths=['description','schedule','pubsub_target'])
response = client.update_job(job=job,update_mask=update_mask)
print(response)
import os
from google.cloud import scheduler_v1
from google.cloud.scheduler_v1 import PubsubTarget as Target
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = './test.json'
client = scheduler_v1.CloudSchedulerClient()
job = Job()
job.name = 'projects/aaa/locations/us-central1/jobs/test'
job.description = 'c'
job.schedule = '59 * * * *'
job.time_zone = 'America/Los_Angeles'
pt = Target()
pt.topic_name = 'projects/aaa/topics/bbb'
pt.data = 'test'.encode('utf-8')
job.pubsub_target = pt
job = client.create_job(parent='projects/aaa/locations/us-central1', job=job)
Related
I am trying to implement fast_download in my existing code. The problem is fast_download module uses await which I am not familiar with. The official documentation is not enough to implement for my current knowledge. Is there anyone who can help me?
here is my code.
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.functions.channels import GetFullChannelRequest
from FastTelethonhelper import fast_download
from telethon.tl.types import InputPeerEmpty
from tqdm import tqdm
api_id =
api_hash = ''
def callback(current, total):
global pbar
global prev_curr
pbar.update(current - prev_curr)
prev_curr = current
def download_media(group, cl, name):
global pbar
global prev_curr
messages = cl.get_messages(group, limit=2000)
start = 0
print(start)
for i, message in enumerate(messages[start:]):
prev_curr = 0
if message.video:
print("\n{} / {} : {}".format(i + start, len(messages), message.file.name))
pbar = tqdm(total=message.document.size, unit='B', unit_scale=True)
message.download_media('./' + name + '/', progress_callback=callback)
# fast_download(client, message.video, download_folder = './' + name + '/', progress_bar_function=callback)
pbar.close()
with TelegramClient('name', api_id, api_hash) as client:
result = client(GetDialogsRequest(
offset_date=None,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=500,
hash=0,
))
title = 'channel_name' # Title for channel
channel = client(GetFullChannelRequest(title))
download_media(channel.full_chat, client, "directory_name")
I was expecting to enhance download speed of telethon api. Download speed of some files are limited to 100 kbs, which is too slow for large files. I have already implemented a code to use with telethon built-in module "download_media".
I have been trying to patch the list_blobs() function of ContainerClient, have not been able to do this successfully, this code outputs a MagicMock() function - but the function isn't patched as I would expect it to be (Trying to patch with a list ['Blob1', 'Blob2'].
#################Script File
import sys
from datetime import datetime, timedelta
import pyspark
import pytz
import yaml
# from azure.storage.blob import BlobServiceClient, ContainerClient
from pyspark.dbutils import DBUtils as dbutils
import azure.storage.blob
# Open Config
def main():
spark_context = pyspark.SparkContext.getOrCreate()
spark_context.addFile(sys.argv[1])
stream = None
stream = open(sys.argv[1], "r")
config = yaml.load(stream, Loader=yaml.FullLoader)
stream.close()
account_key = dbutils.secrets.get(scope=config["Secrets"]["Scope"], key=config["Secrets"]["Key Name"])
target_container = config["Storage Configuration"]["Container"]
target_account = config["Storage Configuration"]["Account"]
days_history_to_keep = config["Storage Configuration"]["Days History To Keep"]
connection_string = (
"DefaultEndpointsProtocol=https;AccountName="
+ target_account
+ ";AccountKey="
+ account_key
+ ";EndpointSuffix=core.windows.net"
)
blob_service_client: azure.storage.blob.BlobServiceClient = (
azure.storage.blob.BlobServiceClient.from_connection_string(connection_string)
)
container_client: azure.storage.blob.ContainerClient = (
blob_service_client.get_container_client(target_container)
)
blobs = container_client.list_blobs()
print(blobs)
print(blobs)
utc = pytz.UTC
delete_before_date = utc.localize(
datetime.today() - timedelta(days=days_history_to_keep)
)
for blob in blobs:
if blob.creation_time < delete_before_date:
print("Deleting Blob: " + blob.name)
container_client.delete_blob(blob, delete_snapshots="include")
if __name__ == "__main__":
main()
#################Test File
import unittest
from unittest import mock
import DeleteOldBlobs
class DeleteBlobsTest(unittest.TestCase):
def setUp(self):
pass
#mock.patch("DeleteOldBlobs.azure.storage.blob.ContainerClient")
#mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
#mock.patch("DeleteOldBlobs.dbutils")
#mock.patch("DeleteOldBlobs.sys")
#mock.patch('DeleteOldBlobs.pyspark')
def test_main(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient, mock_containerclient):
# mock setup
config_file = "Delete_Old_Blobs_UnitTest.yml"
mock_sys.argv = ["unused_arg", config_file]
mock_dbutils.secrets.get.return_value = "A Secret"
mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]
# execute test
DeleteOldBlobs.main()
# TODO assert actions taken
# mock_sys.argv.__get__.assert_called_with()
# dbutils.secrets.get(scope=config['Secrets']['Scope'], key=config['Secrets']['Key Name'])
if __name__ == "__main__":
unittest.main()
Output:
<MagicMock name='BlobServiceClient.from_connection_string().get_container_client().list_blobs()' id='1143355577232'>
What am I doing incorrectly here?
I'm not able to execute your code in this moment, but I have tried to simulate it. To do this I have created the following 3 files in the path: /<path-to>/pkg/sub_pkg1 (where pkg and sub_pkg1 are packages).
File ContainerClient.py
def list_blobs(self):
return "blob1"
File DeleteOldBlobs.py
from pkg.sub_pkg1 import ContainerClient
# Open Config
def main():
blobs = ContainerClient.list_blobs()
print(blobs)
print(blobs)
File DeleteBlobsTest.py
import unittest
from unittest import mock
from pkg.sub_pkg1 import DeleteOldBlobs
class DeleteBlobsTest(unittest.TestCase):
def setUp(self):
pass
def test_main(self):
mock_containerclient = mock.MagicMock()
with mock.patch("DeleteOldBlobs.ContainerClient.list_blobs", mock_containerclient.list_blobs):
mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]
DeleteOldBlobs.main()
if __name__ == '__main__':
unittest.main()
If you execute the test code you obtain the output:
['ablob1', 'ablob2']
['ablob1', 'ablob2']
This output means that the function list_blobs() is mocked by mock_containerclient.list_blobs.
I don't know if the content of this post can be useful for you, but I'm not able to simulate better your code in this moment.
I hope you can inspire to my code to find your real solution.
The structure of the answer didn't match my solution, perhaps both will work but it was important for me to patch pyspark even though i never call it, or exceptions would get thrown when my code tried to interact with spark.
Perhaps this will be useful to someone:
#mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
#mock.patch("DeleteOldBlobs.dbutils")
#mock.patch("DeleteOldBlobs.sys")
#mock.patch('DeleteOldBlobs.pyspark')
def test_list_blobs_called_once(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient):
# mock setup
config_file = "Delete_Old_Blobs_UnitTest.yml"
mock_sys.argv = ["unused_arg", config_file]
account_key = 'Secret Key'
mock_dbutils.secrets.get.return_value = account_key
bsc_mock: mock.Mock = mock.Mock()
container_client_mock = mock.Mock()
blob1 = Blob('newblob', datetime.today())
blob2 = Blob('oldfile', datetime.today() - timedelta(days=20))
container_client_mock.list_blobs.return_value = [blob1, blob2]
bsc_mock.get_container_client.return_value = container_client_mock
mock_blobserviceclient.from_connection_string.return_value = bsc_mock
# execute test
DeleteOldBlobs.main()
#Assert Results
container_client_mock.list_blobs.assert_called_once()
This is the code I'm using to turn the pypresence on:
from pypresence import Presence
import time
start = int(time.time())
client_id = "XXXXXXXXXXXXXXXX"
RPC = Presence(client_id, pipe=0, loop=None, handler=None)
RPC.connect()
while True:
RPC.update(
details = "▬▬▬▬▬▬▬▬▬▬",
state = "All Systems Operational.",
large_image = "xxx_logo_rpc_v2",
large_text = "exe is running!",
small_image = "xx_green_circle_rpc_v2",
small_text = "Online",
start = start,
buttons = [{"label": "🌐XX 🌐", "url": "https://xx/"}, {"label": "xxxx", "url": "https://xxxx8"}]
)
time.sleep(60)
This is the code I'm trying to use to turn it off:
from pypresence import Presence
import os
client_id = "XXXXXXXXXXXXXX"
RPC = Presence(client_id, pipe=0, loop=None, handler=None)
RPC.clear(pid=os.getpid())
RPC.close()
The problem is that it shows me this error:
File "C:\Users\Foxii\AppData\Local\Programs\Python\Python310\lib\site- packages\pypresence\baseclient.py",
line 96, in send_data assert self.sock_writer is not None, "You must connect your client before sending events!" AssertionError: You must connect your client before sending events!
Well, the client seems to be connected when I run it with my custom Tkinter GUI but when I try to run the code to close it, it shows me that error.
I'm working on the PoC about Amundsen Lyft for introducing data catalog.
So, I use the file 'sample_mysql_loader.py' and insert database information.
OS: Ubuntu 18.04
Python: 3.6.9
pip: 21.3.1
elasticsearch: 8.0.0
neo4j: 3.5.26
I installed Amundsen Lyft using quickstart on Docker
import logging
import sys
import textwrap
import uuid
from elasticsearch import Elasticsearch
from pyhocon import ConfigFactory
from sqlalchemy.ext.declarative import declarative_base
from databuilder.extractor.mysql_metadata_extractor import MysqlMetadataExtractor
from databuilder.extractor.neo4j_extractor import Neo4jExtractor
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
from databuilder.job.job import DefaultJob
from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader
from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader
from databuilder.publisher import neo4j_csv_publisher
from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher
from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher
from databuilder.task.task import DefaultTask
from databuilder.transformer.base_transformer import NoopTransformer
import pymysql
pymysql.install_as_MySQLdb()
es_host = None
neo_host = None
if len(sys.argv) > 1:
es_host = sys.argv[1]
if len(sys.argv) > 2:
neo_host = sys.argv[2]
es = Elasticsearch([
{'host': es_host or 'localhost'},
])
DB_FILE = '/tmp/test.db'
SQLITE_CONN_STRING = 'sqlite:////tmp/test.db'
Base = declarative_base()
NEO4J_ENDPOINT = f'bolt://{neo_host or "localhost"}:7687'
neo4j_endpoint = NEO4J_ENDPOINT
neo4j_user = 'neo4j'
neo4j_password = 'test'
LOGGER = logging.getLogger(__name__)
# todo: connection string needs to change
def connection_string():
***user = 'user_name'
password='password'
host = 'host'
port = 'port'
db = 'db_name'***
return "mysql://%s:%s#%s:%s/%s" % (user, ***password,*** host, port, db)
def run_mysql_job():
where_clause_suffix = textwrap.dedent("""
where c.table_schema = '***db_name***'
""")
tmp_folder = '/var/tmp/amundsen/table_metadata'
node_files_folder = f'{tmp_folder}/nodes/'
relationship_files_folder = f'{tmp_folder}/relationships/'
job_config = ConfigFactory.from_dict({
f'extractor.mysql_metadata.{MysqlMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY}': where_clause_suffix,
f'extractor.mysql_metadata.{MysqlMetadataExtractor.USE_CATALOG_AS_CLUSTER_NAME}': True,
f'extractor.mysql_metadata.extractor.sqlalchemy.{SQLAlchemyExtractor.CONN_STRING}': connection_string(),
f'loader.filesystem_csv_neo4j.{FsNeo4jCSVLoader.NODE_DIR_PATH}': node_files_folder,
f'loader.filesystem_csv_neo4j.{FsNeo4jCSVLoader.RELATION_DIR_PATH}': relationship_files_folder,
f'publisher.neo4j.{neo4j_csv_publisher.NODE_FILES_DIR}': node_files_folder,
f'publisher.neo4j.{neo4j_csv_publisher.RELATION_FILES_DIR}': relationship_files_folder,
f'publisher.neo4j.{neo4j_csv_publisher.NEO4J_END_POINT_KEY}': neo4j_endpoint,
f'publisher.neo4j.{neo4j_csv_publisher.NEO4J_USER}': neo4j_user,
f'publisher.neo4j.{neo4j_csv_publisher.NEO4J_PASSWORD}': neo4j_password,
f'publisher.neo4j.{neo4j_csv_publisher.JOB_PUBLISH_TAG}': 'unique_tag', # should use unique tag here like {ds}
})
job = DefaultJob(conf=job_config,
task=DefaultTask(extractor=MysqlMetadataExtractor(), loader=FsNeo4jCSVLoader()),
publisher=Neo4jCsvPublisher())
return job
def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index',
elasticsearch_doc_type_key='table',
model_name='databuilder.models.table_elasticsearch_document.TableESDocument',
cypher_query=None,
elasticsearch_mapping=None):
"""
:param elasticsearch_index_alias: alias for Elasticsearch used in
amundsensearchlibrary/search_service/config.py as an index
:param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in
`table_search_index`
:param model_name: the Databuilder model class used in transporting between Extractor and Loader
:param cypher_query: Query handed to the `Neo4jSearchDataExtractor` class, if None is given (default)
it uses the `Table` query baked into the Extractor
:param elasticsearch_mapping: Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class,
if None is given (default) it uses the `Table` query baked into the Publisher
"""
# loader saves data to this location and publisher reads it from here
extracted_search_data_path = '/var/tmp/amundsen/search_data.json'
task = DefaultTask(loader=FSElasticsearchJSONLoader(),
extractor=Neo4jSearchDataExtractor(),
transformer=NoopTransformer())
# elastic search client instance
elasticsearch_client = es
# unique name of new index in Elasticsearch
elasticsearch_new_index_key = 'tables' + str(uuid.uuid4())
job_config = ConfigFactory.from_dict({
f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.GRAPH_URL_CONFIG_KEY}': neo4j_endpoint,
f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.MODEL_CLASS_CONFIG_KEY}': model_name,
f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_USER}': neo4j_user,
f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_PW}': neo4j_password,
f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY}': extracted_search_data_path,
f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY}': 'w',
f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_PATH_CONFIG_KEY}': extracted_search_data_path,
f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_MODE_CONFIG_KEY}': 'r',
f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY}':
elasticsearch_client,
f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY}':
elasticsearch_new_index_key,
f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY}':
elasticsearch_doc_type_key,
f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY}':
elasticsearch_index_alias,
})
# only optionally add these keys, so need to dynamically `put` them
if cypher_query:
job_config.put(f'extractor.search_data.{Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY}',
cypher_query)
if elasticsearch_mapping:
job_config.put(f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_MAPPING_CONFIG_KEY}',
elasticsearch_mapping)
job = DefaultJob(conf=job_config,
task=task,
publisher=ElasticsearchPublisher())
return job
if __name__ == "__main__":
# Uncomment next line to get INFO level logging
# logging.basicConfig(level=logging.INFO)
loading_job = run_mysql_job()
loading_job.launch()
job_es_table = create_es_publisher_sample_job(
elasticsearch_index_alias='table_search_index',
elasticsearch_doc_type_key='table',
model_name='databuilder.models.table_elasticsearch_document.TableESDocument')
job_es_table.launch()
But, I got the error message below. How can I connect my database to Amundsen Lyft?
elasticsearch.exceptions.RequestError: RequestError(400, 'mapper_parsing_exception', 'Root mapping definition has unsupported parameters: [table : {properties={schema={analyzer=simple, type=text, fields={raw={type=keyword}}}, cluster={type=text}, description={analyzer=simple, type=text}, display_name={type=keyword}, column_descriptions={analyzer=simple, type=text}, programmatic_descriptions={analyzer=simple, type=text}, tags={type=keyword}, badges={type=keyword}, database={analyzer=simple, type=text, fields={raw={type=keyword}}}, total_usage={type=long}, name={analyzer=simple, type=text, fields={raw={type=keyword}}}, last_updated_timestamp={format=epoch_second, type=date}, unique_usage={type=long}, column_names={analyzer=simple, type=text, fields={raw={type=keyword}}}, key={type=keyword}}}]')
Having an issue with Scheduled Queries, using a python script, I get the error:
"google.api_core.exceptions.PermissionDenied: 403 The caller does not have permission", when running the script below.
#!/usr/bin/python
import sys
import json
import time
from google.cloud import bigquery_datatransfer
from google.oauth2 import service_account
prj_id = "project-id"
ds_id = "dataset_id"
gcp_info = json.load(open("key-file.json"))
creds = service_account.Credentials.from_service_account_info(gcp_info)
s_creds = creds.with_scopes(
[
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/bigquery',
]
)
s_acc = "service-account#project_id.iam.gserviceaccount.com"
bq_tc = bigquery_datatransfer.DataTransferServiceClient(credentials=s_creds)
dataset = prj_id + '.' + ds_id + '.'
def main():
argc = len(sys.argv) - 1
if argc != 1:
print("Usage: python3 /root/gcp_new-query.py <Temp-Table>")
sys.exit()
t_id = dataset + sys.argv[1] + '-temp'
t2_id = dataset + sys.argv[1] + '-data'
q2 = """
DELETE FROM `{}` WHERE AddressID > 0 AND MsgTS < TIMESTAMP_SUB(CURRENT_TIMESTAMP(),
INTERVAL 60 MINUTE)
""".format(t_id)
p = bq_tc.common_project_path(prj_id)
tc_cfg2 = bigquery_datatransfer.TransferConfig(
destination_dataset_id=ds_id,
display_name=sys.argv[1]+"-RM-Old-Data",
data_source_id="scheduled_query",
params={
"query": q2,
},
schedule="every hour",
)
tc_cfg2 = bq_tc.create_transfer_config(
bigquery_datatransfer.CreateTransferConfigRequest(
parent=p,
transfer_config=tc_cfg2,
service_account_name=s_acc,
)
)
print("Created scheduled query '{}'".format(tc_cfg2.name))
main()
As soon as it gets into create_transfer_config(), I get the error.
I have gone through the documentation and made sure all of the right permissions have been granted to "service-account#project_id", those being:
BigQuery Data Transfer Service Agent
BigQuery Admin
Am I missing permissions or is there something in my script that's not quite right?
Apologies if I haven't explained everything all that well.
EDIT: I have also made sure that the service account has an associated key json file.
-A.
I found a solution in the end. I simply used:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/json.json"
In the place of:
gcp_info = json.load(open("key-file.json"))
creds = service_account.Credentials.from_service_account_info(gcp_info)
s_creds = creds.with_scopes(
[
'https://www.googleapis.com/auth/cloud-platform',
'https://www.googleapis.com/auth/bigquery',
]
)
s_acc = "service-account#project_id.iam.gserviceaccount.com"