I'm just starting to discover how to build a bot with python. I'm trying to send a message at certain time. I read a lot of example, I read the documentation regarding modul_schedule function but I can't fix this issue...
import config
import telebot
import requests
import schedule
import time
from my_parser import parse
from bs4 import BeautifulSoup as BS
bot = telebot.TeleBot(config.token)
r = requests.get('https://example')
html = BS(r.content, 'html.parser')
for el in html.select('#content'):
t_min = el.select('.temperature .min')[0].text
t_max = el.select('.temperature .max')[0].text
min_text = el.select('.wDescription .description')[0].text
t_test = el.select('.wDescription .description')[0].text
response = requests.get(url='https://example')
data = response.json()
btc_price = f"B: {round(data.get('btc_usd').get('last'), 2)}$"
#bot.message_handler(commands=['start', 'help'])
def main(message):
bot.send_message(
message.chat.id, t_min + ', ' + t_max + '\n' + min_text + '\n' + parse() + '\n' + btc_price)
if __name__ == '__main__':
bot.polling(none_stop=True, interval=0)
schedule.every(1).seconds.do(main)
while True:
schedule.run_pending()
time.sleep(1)
I would like the bot send message every morning with temperature on to a channel. I did not find any clues on how to use the function correctly.
I use this library.
Example of my code.
import aioschedule as schedule
async def some_fun():
pass
async def scheduler():
schedule.every().day.at("09:00").do(some_fun())
while True:
await schedule.run_pending()
await asyncio.sleep(2)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.create_task(scheduler())
main()
Related
I want to scrape millions of log records from a site. so to achieve this I am running this code and it works fine, now I want to control it via Telegram Bot because I'm running it from raspberry pi 24/7 so I modified it but now it throw error There is no current event loop in thread 'Bot:5689047784:dispatcher'.
My Modified code below:
from telegram.ext.updater import Updater
from telegram.update import Update
from telegram.ext.callbackcontext import CallbackContext
from telegram.ext.commandhandler import CommandHandler
from telegram.ext.messagehandler import MessageHandler
from telegram.ext.filters import Filters
from tqdm import tqdm
import asyncio
import aiohttp
import time
import tqdm
import nest_asyncio
from asyncio import ensure_future, events
from asyncio.queues import Queue
from functools import partial
import telegram
nest_asyncio.apply()
telegramtoken = "hideforsecurity"
chatid = "hideforsecurity"
updater = Updater(telegramtoken,use_context=True)
bot = telegram.Bot(token=telegramtoken)
def as_completed_for_async_gen(fs_async_gen, concurrency):
done = Queue()
loop = events.get_event_loop()
todo = set() # +
def _on_completion(f):
todo.remove(f)
done.put_nowait(f)
loop.create_task(_add_next()) # +
async def _wait_for_one():
f = await done.get()
return f.result()
async def _add_next(): # +
try:
f = await fs_async_gen.__anext__()
except StopAsyncIteration:
return
f = ensure_future(f, loop=loop)
f.add_done_callback(_on_completion)
todo.add(f)
for _ in range(concurrency): # +
loop.run_until_complete(_add_next()) # +
while todo: # +
yield _wait_for_one() # +
CONCURRENCY = 50 # +
n = 6400010
q = 6400020
filename = str(n) + "-" + str(q) + ".json"
async def make_async_gen(f, n, q):
async for x in make_numbers(n, q):
yield f(x)
async def fetch():
# example
url = "https://httpbin.org/anything/log/"
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=CONCURRENCY)) as session:
headers = {
"User-Agent": "okhttp/3.12.1",
}
async_gen = make_async_gen(partial(do_get, session, url), n, q) # +
updater.message.reply_text("[*] Downloading started via bot")
for f in tqdm.tqdm(as_completed_for_async_gen(async_gen, CONCURRENCY), total=q-n):
response = await f
# Do something with response, such as writing to a local file
if response != "[null]":
print(response)
file1 = open(str(filename), "a") # append mode
file1.write(response + "\n")
file1.close()
# ...
#===================================================
#----------------------------------------------------------------
async def make_numbers(numbers, _numbers):
for i in range(numbers, _numbers):
yield i
async def do_get(session, url, x):
headers = {
'Accept-Encoding': 'gzip, deflate',
'User-Agent': 'okhttp/3.12.1',
'Connection': 'close'
}
async with session.get(url + str(x), headers=headers) as response:
data = await response.text()
return data # +
s = time.perf_counter()
def start(update: Update, context: CallbackContext):
update.message.reply_text(
"Hello sir, Welcome to the Bot.Please write\
/help to see the commands available.")
def help(update: Update, context: CallbackContext):
update.message.reply_text("""Available Commands :-
/status - To get the youtube URL
/download - To get the LinkedIn profile URL""")
def status(update: Update, context: CallbackContext):
update.message.reply_text("current status is at" + str(1))
def download(update: Update, context: CallbackContext):
update.message.reply_text("[*] Downloading started via bot")
try:
c1 = str(n)
c2 = str(q)
bot.sendMessage(chat_id=chatid, text="[*] Downloading Started " + c1 +" - "+ c2)
loop = asyncio.get_event_loop()
loop.run_until_complete(fetch())
bot.sendMessage(chat_id=chatid, text="[*] Downloading Complete " + c1 +" - "+ c2)
except Exception as e:
print(e)
bot.sendMessage(chat_id=chatid, text="[*] Downloading Failed " + c1 + " - " + c2 + " << Reason >> " + str(e))
exit()
elapsed = time.perf_counter() - s
update.message.reply_text("[*] Downloading completed via bot")
def unknown(update: Update, context: CallbackContext):
update.message.reply_text(
"Sorry '%s' is not a valid command" % update.message.text)
def unknown_text(update: Update, context: CallbackContext):
update.message.reply_text(
"Sorry I can't recognize you , you said '%s'" % update.message.text)
updater.dispatcher.add_handler(CommandHandler('start', start))
updater.dispatcher.add_handler(CommandHandler('status', status))
updater.dispatcher.add_handler(CommandHandler('help', help))
updater.dispatcher.add_handler(CommandHandler('download', download))
updater.dispatcher.add_handler(MessageHandler(Filters.text, unknown))
updater.dispatcher.add_handler(MessageHandler(
Filters.command, unknown)) # Filters out unknown commands
# Filters out unknown messages.
updater.dispatcher.add_handler(MessageHandler(Filters.text, unknown_text))
updater.start_polling()
Please help & guide me if this approach of controlling program execution on raspberry is correct or Suggest me some better way.
Below is the python code (program.py) and the requirements file (requirements.txt).
Function async def get_title_range() is not working properly, it generates the following error code:
httpx.HTTPStatusError: Redirect response '301 Moved Permanently' for
url 'https://talkpython.fm/episodes/show/271' Redirect location:
'https://talkpython.fm/episodes/show/271/unlock-the-mysteries-of-time-pythons-datetime-that-is'
For more information check: https://httpstatuses.com/301
Python code, based on python 3.9 (program.py):
import asyncio
import datetime
import httpx
import bs4
from colorama import Fore
global loop
async def get_html(episode_number: int) -> str:
print(Fore.YELLOW + f"Getting HTML for episode {episode_number}", flush=True)
url = f"https://talkpython.fm/episodes/show/{episode_number}"
async with httpx.AsyncClient() as client:
resp = await client.get(url)
resp.raise_for_status()
return resp.text
def get_title(html: str, episode_number: int) -> str:
print(Fore.CYAN + f"Getting TITLE for episode {episode_number}", flush=True)
soup = bs4.BeautifulSoup(html, 'html.parser')
header = soup.select_one('h1')
if not header:
return "MISSING"
return header.text.strip()
def main():
t0 = datetime.datetime.now()
global loop
loop = asyncio.get_event_loop()
loop.run_until_complete(get_title_range())
dt = datetime.datetime.now() - t0
print(f"Done in {dt.total_seconds():.2f} sec.")
async def get_title_range()
tasks = []
for n in range(270, 280):
tasks.append((n, loop.create_task(get_html(n))))
for n, t in tasks:
html = await t
title = get_title(html, n)
print(Fore.WHITE + f"Title found: {title}", flush=True)
if __name__ == '__main__':
main()
The requirements (requitements.txt):
bs4
colorama
httpx
Here is my code using request-html ASyncHtmlSession in Fast api
#app.get('/')
async def ScrapeData(pages:Optional[int]= 1):
crawle = Crawler()
for page in range(1,pages+1):
url = f"url here"
asession = AsyncHTMLSession()
r = await asession.get(url)
await r.html.arender(sleep=1)
widget = r.html.xpath('//*[#id="widgetContent"]')[0]
items = widget.find('div')
crawle.GetData(items)
return crawle.data
You need to explicitly enable redirects in httpx (unlike in requests). From their docs:
Unlike requests, HTTPX does not follow redirects by default.
We differ in behaviour here because auto-redirects can easily mask unnecessary network calls being made.
You can still enable behaviour to automatically follow redirects, but you need to do so explicitly...
response = client.get(url, follow_redirects=True)
Or else instantiate a client, with redirect following enabled by default...
client = httpx.Client(follow_redirects=True)
I am scraping blog urls from main page, and later I iterate over all urls to retrive text on it.
Will generator be faster if I move loop to blogscraper and make yield some_text ? I guess app will still be one threaded and It wont request next pages while computing text from html.
Should I use asyncio? or there are some better modules to make it parrel? Create generator that yields coroutine results as the coroutines finish
I also want to make later small rest app for displaying results
def readmainpage(self):
blogurls = []
while(nextPage):
r = requests.get(url)
...
blogurls += [new_url]
return blogurls
def blogscraper(self, url):
r = request.get(url)
...
return sometext
def run(self):
blog_list = self.readmainpage()
for blog in blog_list:
data = self.blogscraper(blog['url'])
Using threading package, you can run your top function (object initialitization) asynchronously. It will create sub parallel sub-process for your requests. For example, single page fetching is 2 mins and you have 10 pages. In threading, all will take 2 mins. Threading in Python 3.x
With asyncio you can try to use aiohttp module:
pip install aiohttp
As example code it's can looks something like this, also can be done some improvements but it depends on your code...
import sys
import aiohttp
import asyncio
import socket
from urllib.parse import urlparse
class YourClass:
def __init__(self):
self.url = "..."
url_parsed = urlparse( self.url )
self.session = aiohttp.ClientSession(
headers = { "Referer": f"{ url_parsed.scheme }://{ url_parsed.netloc }" },
auto_decompress = True,
connector = aiohttp.TCPConnector(family=socket.AF_INET, verify_ssl=False) )
async def featch(self, url):
async with self.session.get( url ) as resp:
assert resp.status == 200
return await resp.text()
async def readmainpage(self):
blogurls = []
while nextPage:
r = await self.featch(self.url)
# ...
blogurls += [new_url]
return blogurls
async def blogscraper(self, url):
r = await self.featch(url)
return r
# ...
return sometext
async def __call__(self):
url_parsed = urlparse( self.url )
blog_list = await self.readmainpage()
coros = [ asyncio.Task( self.blogscraper( blog['url']) ) for blog in blog_list ]
for data in await asyncio.gather( *coros ):
print(data)
# do not forget to close session if not using with statement
await self.session.close()
def main():
featcher = YourClass()
loop = asyncio.get_event_loop()
loop.run_until_complete( featcher() )
sys.exit(0)
if __name__ == "__main__":
main()
I am writing a simple producer/consumer app to call multiple URL's asynchronously.
In the following code if I set the conn_count=1, and add 2 items to the Queue it works fine as only one consumer is created. But if I make conn_count=2 and add 4 items to the Queue only 3 request are being made. The other request fails with ClientConnectorError.
Can you please help be debug the reason for failure with multiple consumers? Thank You.
I am using a echo server I created.
Server:
import os
import logging.config
import yaml
from aiohttp import web
import json
def start():
setup_logging()
app = web.Application()
app.router.add_get('/', do_get)
app.router.add_post('/', do_post)
web.run_app(app)
async def do_get(request):
return web.Response(text='hello')
async def do_post(request):
data = await request.json()
return web.Response(text=json.dumps(data))
def setup_logging(
default_path='logging.yaml',
default_level=logging.INFO,
env_key='LOG_CFG'
):
path = default_path
value = os.getenv(env_key, None)
if value:
path = value
if os.path.exists(path):
with open(path, 'rt') as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
else:
logging.basicConfig(level=default_level)
if __name__ == '__main__':
start()
Client:
import asyncio
import collections
import json
import sys
import async_timeout
from aiohttp import ClientSession, TCPConnector
MAX_CONNECTIONS = 100
URL = 'http://localhost:8080'
InventoryAccount = collections.namedtuple("InventoryAccount", "op_co customer_id")
async def produce(queue, num_consumers):
for i in range(num_consumers * 2):
await queue.put(InventoryAccount(op_co=i, customer_id=i * 100))
for j in range(num_consumers):
await queue.put(None)
async def consumer(n, queue, session, responses):
print('consumer {}: starting'.format(n))
while True:
try:
account = await queue.get()
if account is None:
queue.task_done()
break
else:
print(f"Consumer {n}, Updating cloud prices for account: opCo = {account.op_co!s}, customerId = {account.customer_id!s}")
params = {'opCo': account.op_co, 'customerId': account.customer_id}
headers = {'content-type': 'application/json'}
with async_timeout.timeout(10):
print(f"Consumer {n}, session state " + str(session.closed))
async with session.post(URL,
headers=headers,
data=json.dumps(params)) as response:
assert response.status == 200
responses.append(await response.text())
queue.task_done()
except:
e = sys.exc_info()[0]
print(f"Consumer {n}, Error updating cloud prices for account: opCo = {account.op_co!s}, customerId = {account.customer_id!s}. {e}")
queue.task_done()
print('consumer {}: ending'.format(n))
async def start(loop, session, num_consumers):
queue = asyncio.Queue(maxsize=num_consumers)
responses = []
consumers = [asyncio.ensure_future(loop=loop, coro_or_future=consumer(i, queue, session, responses)) for i in range(num_consumers)]
await produce(queue, num_consumers)
await queue.join()
for consumer_future in consumers:
consumer_future.cancel()
return responses
async def run(loop, conn_count):
async with ClientSession(loop=loop, connector=TCPConnector(verify_ssl=False, limit=conn_count)) as session:
result = await start(loop, session, conn_count)
print("Result: " + str(result))
if __name__ == '__main__':
conn_count = 2
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(run(loop, conn_count))
finally:
loop.close()
Reference:
https://pymotw.com/3/asyncio/synchronization.html
https://pawelmhm.github.io/asyncio/python/aiohttp/2016/04/22/asyncio-aiohttp.html
https://hackernoon.com/asyncio-for-the-working-python-developer-5c468e6e2e8e
I made a telegram bot with python-telegram-bot. I have defined a list of words for the bot and I want to manage the chat bot in the group. That is, if there is a word in the chat that is found in the defined list, the bot will delete it. I added the bot to a group and admin it there. The bot should control the messages sent to the group, and if there is a word in the message that is on the mlist, the bot should delete the message. my codes:
# -*- coding: cp1256 -*-
#!/usr/bin/python
import os, sys
from telegram.ext import Filters
from telegram.ext import Updater, MessageHandler
import re
def delete_method(bot, update):
if not update.message.text:
print("it does not contain text")
return
mlist=['Hello', 'سلام']
for i in mlist:
if re.search(i, update.message.text):
bot.delete_message(chat_id=update.message.chat_id,message_id=update.message.message_id)
def main():
updater = Updater(token='TOKEN')
dispatcher = updater.dispatcher
dispatcher.add_handler(MessageHandler(Filters.all, delete_method))
updater.start_polling()
updater.idle()
if __name__ == '__main__':
main()
# -*- coding: utf8 -*-
#!python2
import time
import json
import requests
#TOKEN = XXXXXX
URL = "https://api.telegram.org/bot{}/".format(TOKEN)
def get_updates(offset=None):
url = URL + "getUpdates?timeout=100"
if offset:url += "&offset={}".format(offset)
return requests.get(url).json()
def get_last_update_id(updates):
update_ids = []
for update in updates["result"]:
update_ids.append(int(update["update_id"]))
return max(update_ids)
def delete_message(message_id, chat_id,msg):
mlist=['Hello', 'سلام']
url=URL + "deleteMessage?message_id={}&chat_id={}".format(message_id, chat_id)
for i in mlist:
if i in msg:request.get(url)
def echo_all(updates):
for update in updates["result"]:
cid = update["message"]["chat"]["id"]
msg = update["message"].get("text")
mid = update["message"].get("message_id")
if msg:delete_message(mid,cid,msg)
def main():
last_update_id = None
while True:
try:
updates = get_updates(last_update_id)
z=updates.get("result")
if z and len(z) > 0:
last_update_id = get_last_update_id(updates) + 1
echo_all(updates)
time.sleep(0.5)
except Exception as e:
print(e)
if __name__ == '__main__':
main()