I am trying to implement fast_download in my existing code. The problem is fast_download module uses await which I am not familiar with. The official documentation is not enough to implement for my current knowledge. Is there anyone who can help me?
here is my code.
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.functions.channels import GetFullChannelRequest
from FastTelethonhelper import fast_download
from telethon.tl.types import InputPeerEmpty
from tqdm import tqdm
api_id =
api_hash = ''
def callback(current, total):
global pbar
global prev_curr
pbar.update(current - prev_curr)
prev_curr = current
def download_media(group, cl, name):
global pbar
global prev_curr
messages = cl.get_messages(group, limit=2000)
start = 0
print(start)
for i, message in enumerate(messages[start:]):
prev_curr = 0
if message.video:
print("\n{} / {} : {}".format(i + start, len(messages), message.file.name))
pbar = tqdm(total=message.document.size, unit='B', unit_scale=True)
message.download_media('./' + name + '/', progress_callback=callback)
# fast_download(client, message.video, download_folder = './' + name + '/', progress_bar_function=callback)
pbar.close()
with TelegramClient('name', api_id, api_hash) as client:
result = client(GetDialogsRequest(
offset_date=None,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=500,
hash=0,
))
title = 'channel_name' # Title for channel
channel = client(GetFullChannelRequest(title))
download_media(channel.full_chat, client, "directory_name")
I was expecting to enhance download speed of telethon api. Download speed of some files are limited to 100 kbs, which is too slow for large files. I have already implemented a code to use with telethon built-in module "download_media".
Related
I have been trying to patch the list_blobs() function of ContainerClient, have not been able to do this successfully, this code outputs a MagicMock() function - but the function isn't patched as I would expect it to be (Trying to patch with a list ['Blob1', 'Blob2'].
#################Script File
import sys
from datetime import datetime, timedelta
import pyspark
import pytz
import yaml
# from azure.storage.blob import BlobServiceClient, ContainerClient
from pyspark.dbutils import DBUtils as dbutils
import azure.storage.blob
# Open Config
def main():
spark_context = pyspark.SparkContext.getOrCreate()
spark_context.addFile(sys.argv[1])
stream = None
stream = open(sys.argv[1], "r")
config = yaml.load(stream, Loader=yaml.FullLoader)
stream.close()
account_key = dbutils.secrets.get(scope=config["Secrets"]["Scope"], key=config["Secrets"]["Key Name"])
target_container = config["Storage Configuration"]["Container"]
target_account = config["Storage Configuration"]["Account"]
days_history_to_keep = config["Storage Configuration"]["Days History To Keep"]
connection_string = (
"DefaultEndpointsProtocol=https;AccountName="
+ target_account
+ ";AccountKey="
+ account_key
+ ";EndpointSuffix=core.windows.net"
)
blob_service_client: azure.storage.blob.BlobServiceClient = (
azure.storage.blob.BlobServiceClient.from_connection_string(connection_string)
)
container_client: azure.storage.blob.ContainerClient = (
blob_service_client.get_container_client(target_container)
)
blobs = container_client.list_blobs()
print(blobs)
print(blobs)
utc = pytz.UTC
delete_before_date = utc.localize(
datetime.today() - timedelta(days=days_history_to_keep)
)
for blob in blobs:
if blob.creation_time < delete_before_date:
print("Deleting Blob: " + blob.name)
container_client.delete_blob(blob, delete_snapshots="include")
if __name__ == "__main__":
main()
#################Test File
import unittest
from unittest import mock
import DeleteOldBlobs
class DeleteBlobsTest(unittest.TestCase):
def setUp(self):
pass
#mock.patch("DeleteOldBlobs.azure.storage.blob.ContainerClient")
#mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
#mock.patch("DeleteOldBlobs.dbutils")
#mock.patch("DeleteOldBlobs.sys")
#mock.patch('DeleteOldBlobs.pyspark')
def test_main(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient, mock_containerclient):
# mock setup
config_file = "Delete_Old_Blobs_UnitTest.yml"
mock_sys.argv = ["unused_arg", config_file]
mock_dbutils.secrets.get.return_value = "A Secret"
mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]
# execute test
DeleteOldBlobs.main()
# TODO assert actions taken
# mock_sys.argv.__get__.assert_called_with()
# dbutils.secrets.get(scope=config['Secrets']['Scope'], key=config['Secrets']['Key Name'])
if __name__ == "__main__":
unittest.main()
Output:
<MagicMock name='BlobServiceClient.from_connection_string().get_container_client().list_blobs()' id='1143355577232'>
What am I doing incorrectly here?
I'm not able to execute your code in this moment, but I have tried to simulate it. To do this I have created the following 3 files in the path: /<path-to>/pkg/sub_pkg1 (where pkg and sub_pkg1 are packages).
File ContainerClient.py
def list_blobs(self):
return "blob1"
File DeleteOldBlobs.py
from pkg.sub_pkg1 import ContainerClient
# Open Config
def main():
blobs = ContainerClient.list_blobs()
print(blobs)
print(blobs)
File DeleteBlobsTest.py
import unittest
from unittest import mock
from pkg.sub_pkg1 import DeleteOldBlobs
class DeleteBlobsTest(unittest.TestCase):
def setUp(self):
pass
def test_main(self):
mock_containerclient = mock.MagicMock()
with mock.patch("DeleteOldBlobs.ContainerClient.list_blobs", mock_containerclient.list_blobs):
mock_containerclient.list_blobs.return_value = ["ablob1", "ablob2"]
DeleteOldBlobs.main()
if __name__ == '__main__':
unittest.main()
If you execute the test code you obtain the output:
['ablob1', 'ablob2']
['ablob1', 'ablob2']
This output means that the function list_blobs() is mocked by mock_containerclient.list_blobs.
I don't know if the content of this post can be useful for you, but I'm not able to simulate better your code in this moment.
I hope you can inspire to my code to find your real solution.
The structure of the answer didn't match my solution, perhaps both will work but it was important for me to patch pyspark even though i never call it, or exceptions would get thrown when my code tried to interact with spark.
Perhaps this will be useful to someone:
#mock.patch("DeleteOldBlobs.azure.storage.blob.BlobServiceClient")
#mock.patch("DeleteOldBlobs.dbutils")
#mock.patch("DeleteOldBlobs.sys")
#mock.patch('DeleteOldBlobs.pyspark')
def test_list_blobs_called_once(self, mock_pyspark, mock_sys, mock_dbutils, mock_blobserviceclient):
# mock setup
config_file = "Delete_Old_Blobs_UnitTest.yml"
mock_sys.argv = ["unused_arg", config_file]
account_key = 'Secret Key'
mock_dbutils.secrets.get.return_value = account_key
bsc_mock: mock.Mock = mock.Mock()
container_client_mock = mock.Mock()
blob1 = Blob('newblob', datetime.today())
blob2 = Blob('oldfile', datetime.today() - timedelta(days=20))
container_client_mock.list_blobs.return_value = [blob1, blob2]
bsc_mock.get_container_client.return_value = container_client_mock
mock_blobserviceclient.from_connection_string.return_value = bsc_mock
# execute test
DeleteOldBlobs.main()
#Assert Results
container_client_mock.list_blobs.assert_called_once()
I would like to serve both gRPC and HTTP in my flow, but the flow description only allows a single value in the protocol parameter. Is it possible to add both? If not, do i have to deploy two flows or is there a better workaround?
The documentation doesn't mention if i can have two gateways from what i can see?
f = Flow(protocol='grpc', port=12345).add(uses=FooExecutor)
with f:
client = Client(port=12345)
docs = client.post(on='/')
print(docs.texts)
Unfortunately by default, no.
But you can develop your own custom gateway that enables both protocols at the same time.
A sample custom gateway looks like the following (borrowed from here)
import grpc
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
from grpc_reflection.v1alpha import reflection
from pydantic import BaseModel
from uvicorn import Config, Server
from jina import Gateway, __default_host__
from jina.proto import jina_pb2, jina_pb2_grpc
class DummyResponseModel(BaseModel):
protocol: str
class MultiProtocolGateway(Gateway):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.http_port = self.ports[0]
self.grpc_port = self.ports[1]
self.health_servicer = health.HealthServicer(experimental_non_blocking=True)
async def _setup_http_server(self):
from fastapi import FastAPI
app = FastAPI(
title='HTTP Server',
)
#app.get(path='/', response_model=DummyResponseModel)
def _get_response():
return {'protocol': 'http'}
self.http_server = Server(
Config(app, host=__default_host__, port=self.http_port)
)
async def _setup_grpc_server(self):
self.grpc_server = grpc.aio.server()
jina_pb2_grpc.add_JinaRPCServicer_to_server(
self.streamer._streamer, self.grpc_server
)
service_names = (
jina_pb2.DESCRIPTOR.services_by_name['JinaRPC'].full_name,
reflection.SERVICE_NAME,
)
# Mark all services as healthy.
health_pb2_grpc.add_HealthServicer_to_server(
self.health_servicer, self.grpc_server
)
for service in service_names:
self.health_servicer.set(service, health_pb2.HealthCheckResponse.SERVING)
reflection.enable_server_reflection(service_names, self.grpc_server)
self.grpc_server.add_insecure_port(f'{__default_host__}:{self.grpc_port}')
await self.grpc_server.start()
async def setup_server(self):
await self._setup_http_server()
await self._setup_grpc_server()
async def run_server(self):
await self.http_server.serve()
await self.grpc_server.wait_for_termination()
async def shutdown(self):
self.http_server.should_exit = True
await self.grpc_server.stop(0)
await self.http_server.shutdown()
self.health_servicer.enter_graceful_shutdown()
#property
def _should_exit(self) -> bool:
return self.http_server.should_exit
And you can access it in the following way:
from xxx import MultiProtocolGateway
from xxx import MyExecutor
from jina import Flow, Client, DocumentArray
http_port = 51000
grpc_port = 52000
flow = Flow().config_gateway(
uses=MultiProtocolGateway,
port=[http_port, grpc_port],
protocol=['http', 'grpc'],
).add(MyExecutor)
with flow:
c1 = Client(host='http://0.0.0.0:51000)
c1.post(on='/', inputs=DocumentArray().empty(5))
c2 = Client(host='grpc://0.0.0.0:52000)
c2.post(on='/', inputs=DocumentArray().empty(5))
I want to show the floating filter on all the columns by setting the defaultColDef.floatingFilter to True. But the floating filter is not displayed at all. Another setting of defaultColDef is set correctly such as grid.options.defaultColDef.editable. Can someone point it out what is wrong in the code below? Thanks.
The ag-grid documentation is here.
import justpy as jp
import pandas as pd
import requests
import json
import re
import os
import time
from ratelimit import limits
from tenacity import retry, stop_after_attempt, wait_fixed
from datetime import datetime, timedelta, date
wm_df = pd.read_csv('https://elimintz.github.io/women_majors.csv').round(2)
async def select_all_rows(self, msg):
await self.grid.run_api('selectAll()', msg.page)
async def deselect_rows(self, msg):
await self.grid.run_api('deselectAll()', msg.page)
async def resetFilters(self, msg):
await self.grid.run_api('setFilterModel()', msg.page)
async def restoreFilters(self, msg):
# savedFilterValues = msg.page.filterValues
await self.grid.run_api("setFilterModel({year: {type: 'lessThan',filter: '1980'}})", msg.page)
def row_selected(self, msg):
wp = msg.page
if msg.selected:
wp.selected_rows[msg.rowIndex] = msg.data
else:
wp.selected_rows.pop(msg.rowIndex)
def downloadRow(self, msg):
wp = msg.page
wp.resultSelect.text = wp.selected_rows.values()
def grid_test():
wp = jp.QuasarPage(dark=False)
wp.selected_rows = {}
grid = wm_df.jp.ag_grid(a=wp)
grid.options.pagination = True
grid.options.paginationAutoPageSize = True
grid.options.columnDefs[0].checkboxSelection = True
grid.options.columnDefs[0].headerCheckboxSelection = True
grid.options.columnDefs[0].headerCheckboxSelectionFilteredOnly = True
grid.options.columnDefs[1].hide = True
# grid.options.columnDefs[1].floatingFilter = True
# grid.options.defaultColDef.filter = True
grid.options.defaultColDef.floatingFilter = True
grid.options.defaultColDef.enableValue = True
grid.options.defaultColDef.editable = True
grid.options.rowSelection = 'multiple'
grid.options.sideBar = True
grid.on('rowSelected', row_selected)
d = jp.Div(classes='q-pa-md q-gutter-sm', a=wp)
jp.QButton(label="Download", color="primary", a=d, click=downloadRow)
buttonResetFilter = jp.QButton(label="Reset filter", color="primary", a=d, click=resetFilters)
buttonResetFilter.grid = grid
restoreRestoreFilter = jp.QButton(label="Restore filter", color="primary", a=d, click=restoreFilters)
restoreRestoreFilter.grid = grid
wp.resultSelect = jp.Div(classes='q-pa-md q-gutter-sm', a=wp, text='The result will be displayed here')
return wp
jp.justpy(grid_test)
#Kanda - thank you for your excellent question.
As a committer of justpy I have added your code to the justpy codebase and tried it out using
python examples/stackoverflow/q73497028.py
The result is:
and i assume you are expecting the behavior described in
https://ag-grid.com/javascript-data-grid/floating-filters/
Given that https://github.com/justpy-org/justpy/issues/314) is not fixed you might want to make sure whether the feature you are expecting is actually available in the justpy version you are using (which you might state in your question for clarity). I am assuming you are using the most current version 0.2.8. Since the revival of justpy as discussed in https://github.com/justpy-org/justpy/discussions/409 you might note that the justpy community tries to stay on top of user expectations. Unfortunately there are limits to fullfilling the expectations so you might want to watch out for questions and issues labeled "ag-grid" in https://github.com/justpy-org/justpy/issues?q=is%3Aopen+is%3Aissue+label%3A%22AG+Grid%22
I try to use prometheus_client for export RabbitMQ metrics. I have a problem with decorator functions.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#from prometheus_client import start_http_server, Summary
import prometheus_client as prom
import random
import time
import pika
queue_name = [
"capt",
"dev-capt",
"myBeautifullTest"
]
def get_metric(qname):
queue_descriptor = channel.queue_declare(qname, durable=True)
queue_len = queue_descriptor.method.message_count
return float(queue_len)
params = pika.ConnectionParameters(
host='rabbitmq1.local',
port=5672,
credentials=pika.credentials.PlainCredentials('guest11', 'guest22'),
)
connection = pika.BlockingConnection(parameters=params)
channel = connection.channel()
i = prom.Info("RMQPE", "RabbitMQ Prometheus Exporter")
i.info({'version': '0.0.1'})
# Create a metric to track time spent and requests made.
REQUEST_TIME = prom.Summary('request_processing_seconds', 'Time spent processing request')
# Decorate function with metric.
#REQUEST_TIME.time()
def process_request():
"""A dummy function that takes some time."""
time.sleep(1)
RABBIT_QUEUE = prom.Gauge('rabbitmq_test_exporter', 'queue_length' , ['queue_name'], multiprocess_mode = 'all')
for qname in queue_name:
queue_descriptor = channel.queue_declare(qname, durable=True)
queue_len = queue_descriptor.method.message_count
RABBIT_QUEUE.labels(qname).set(queue_len)
#RABBIT_QUEUE.track_inprogress()
def f():
pass
with RABBIT_QUEUE.track_inprogress():
pass
if __name__ == '__main__':
# Start up the server to expose the metrics.
prom.start_http_server(27015) # Yes, CS port :)
# Generate some requests.
while True:
process_request()
f()
I have a message:
andrey#xps:~/prj/python3/rmq$ ./prj2.py
Traceback (most recent call last):
File "./prj2.py", line 56, in
#RABBIT_QUEUE.track_inprogress()
File "/usr/local/lib/python3.8/dist-packages/prometheus_client/metrics.py", line 372, in track_inprogress
self._raise_if_not_observable()
File "/usr/local/lib/python3.8/dist-packages/prometheus_client/metrics.py", line 66, in _raise_if_not_observable
raise ValueError('%s metric is missing label values' % str(self._type))
ValueError: gauge metric is missing label values
I need 3 metrics. Maybe more.
If I remove the decorator, my code is working, but I haven't updated values.
Please help.
Thank you.
SOLVED!
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#from prometheus_client import start_http_server, Summary
import prometheus_client as prom
import random
import time
import pika
queue_name = [
"capt",
"dev-capt",
"myBeautifullTest"
]
params = pika.ConnectionParameters(
host='rabbitmq1.local',
port=5672,
credentials=pika.credentials.PlainCredentials('guest11', 'guest22'),
)
connection = pika.BlockingConnection(parameters=params)
channel = connection.channel()
i = prom.Info("RMQPE", "RabbitMQ Prometheus Exporter")
i.info({'version': '0.0.1'})
# Create a metric to track time spent and requests made.
REQUEST_TIME = prom.Summary('request_processing_seconds', 'Time spent processing request')
# Decorate function with metric.
#REQUEST_TIME.time()
def process_request():
time.sleep(1)
if __name__ == '__main__':
# name documentation labelnames
RABBIT_QUEUE = prom.Gauge('rabbitmq_test_exporter', 'queue_length', labelnames=['queue_name'])
prom.start_http_server(27015)
while True:
process_request()
for qname in queue_name:
queue_descriptor = channel.queue_declare(qname, durable=True)
queue_len = queue_descriptor.method.message_count
RABBIT_QUEUE.labels(qname).set(queue_len)
i'm trying to do something that downloads a lot of file from a telegram channel
the code works well but it takes too long and above all that I have a slow internet connection
I have this code, I am downloading files that weigh 1gb but it takes a long time for an example to make the download faster?
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetHistoryRequest
import datetime
import os
def get_entity_data(entity_id, limit):
entity = client.get_entity(entity_id)
fecha = datetime.datetime.today()
today = fecha.day
yesterday = today - 1
posts = client(GetHistoryRequest(
peer=entity,
limit=limit,
offset_date=None,
offset_id=0,
max_id=0,
min_id=0,
add_offset=0,
hash=0))
for post in posts.messages:
post_day = post.date.day
if post_day >= yesterday:
if post.media is not None:
try:
file_name = post.media.document.attributes[0].file_name
except:
file_name = post.media.document.attributes[1].file_name
directorio = os.getcwd()+'/descargas'
if os.path.exists('descargas/'+file_name) == False:
print(file_name, 'Descargando...')
client.download_media(message=post, file=directorio)
print('Archivo descargado.')
I think you can handle it by fewer limit and set offset and using multithreaded requests, maybe pool package helps you in this approach.
for example, the limit parameter set to 10 and there exists 1000 id which you want to get, so the offset should be offset = [0, 10, 20, 30, ..., 1000]
then:
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetHistoryRequest
import datetime
import os
import pool
offsets = [0, 10, 20, 30, ..., 1000]
pool.map(get_entity_data, offsets)
def get_entity_data(entity_id={your_id}, limit=10, offset_id=0):
your function