Error while Scrapping messages from a Telegram group - python

I am using following code for scrapping messages from a telegram group but getting error;
Please guide me how to solve this or suggest any-other efficient solution.
RuntimeError: You must use "async with" if the event loop is running (i.e. you are inside an "async def")
Code:
from telethon.sync import TelegramClient
import datetime
import pandas as pd
import configparser
config = configparser.ConfigParser()
config.read("telethon.config")
api_id = config["telethon_credentials"]["api_id"]
api_hash = config["telethon_credentials"]["api_hash"]
chats = ['cryptodubai7']
client = TelegramClient('test', api_id, api_hash)
df = pd.DataFrame()
for chat in chats:
with TelegramClient('test', api_id, api_hash) as client:
for message in client.iter_messages(chat, offset_date=datetime.date.today() , reverse=True):
print(message)
data = { "group" : chat, "sender" : message.sender_id, "text" : message.text, "date" : message.date}
temp_df = pd.DataFrame(data, index=[1])
df = df.append(temp_df)
df['date'] = df['date'].dt.tz_localize(None)

You are creating 2 TelegramClient's, doesn't seem needed
You need to client.start()
Like the error suggest, you need to use async
Also an await is needed for retrieving the messages
I'd recommend using an event loop, like asyncio
Something like this should get you in the right direction:
Untested!
from telethon.sync import TelegramClient
import datetime
import pandas as pd
import asyncio
import configparser
config = configparser.ConfigParser()
config.read("telethon.config")
api_id = config["telethon_credentials"]["api_id"]
api_hash = config["telethon_credentials"]["api_hash"]
chats = ['cryptodubai7']
client = TelegramClient('test', api_id, api_hash)
client.start()
async def main():
df = pd.DataFrame()
for chat in chats:
messages = await client.iter_messages(chat, offset_date=datetime.date.today() , reverse=True)
for message in messages:
print(message)
data = { "group" : chat, "sender" : message.sender_id, "text" : message.text, "date" : message.date}
temp_df = pd.DataFrame(data, index=[1])
df = df.append(temp_df)
df['date'] = df['date'].dt.tz_localize(None)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

Related

Get all recent users or participants from a telegram channel with telegram API and telethon

I'm using peewee to store participants in a telegram channel. How do I get only new participants, i.e. those who have not been previously added?
Maybe we can offset by time? or offset by those who are already in the database?
Not so sure how to perform offsets in GetParticipantsRequest
from telethon import TelegramClient
from telethon.tl.functions.channels import GetParticipantsRequest
from telethon.tl.types import ChannelParticipantsSearch
from time import sleep
from schema import channel_users as cudb
from datetime import datetime
import json
from dotenv import load_dotenv
load_dotenv()
import os
api_id = os.getenv('API_ID')
api_hash = os.getenv('API_HASH')
PHONE = os.getenv('PHONE')
USERNAME = os.getenv('USERNAME')
# Remember to use your own values from my.telegram.org!
client = TelegramClient('anon', api_id, api_hash)
async def main():
# Getting information about yourself
me = await client.get_me()
my_channel = 'https://t.me/some_channel_url'
offset = 0
limit = 100
all_participants = []
while True:
participants = await client(GetParticipantsRequest(
my_channel, ChannelParticipantsSearch(''), offset, limit,
hash=0
))
if not participants.users:
break
all_participants.extend(participants.users)
offset += len(participants.users)
all_user_details = []
for participant in all_participants:
now = datetime.now()
date_added = now.strftime("%d/%m/%Y, %H:%M:%S")
channel_user_id, created = cudb.get_or_create(
id = participant.id,
defaults = {'first_name' : participant.first_name,
'last_name' : participant.last_name,
'username' : participant.username,
'phone' : participant.phone,
'is_bot' : participant.bot,
'date_added' : date_added}
)
if (created):
print(f'successfully created channel_usersID = {channel_user_id}')
else:
print(f'did not create anything, user information found in channel_usersID {channel_user_id}')
with client:
client.loop.run_until_complete(main())
ok I've sort of solved it with this. Problem is - Now trying to figure out how to update every time new user joins
while True:
participants = await client(GetParticipantsRequest(
my_channel, ChannelParticipantsSearch(''), offset, limit,
hash=0
))
number_of_participants = len(participants.users)
print(f'{len(participants.users)} length')
max_cudb = cudb.select(fn.MAX(cudb.channel_usersID)).scalar()
if max_cudb == len(participants.users):
print('id is same as number of participants in group, hence nothing new')
break
if not participants.users:
break
# calculate the difference between number of participants and last user added to DB
number_to_add = number_of_participants - max_cudb
# adds missing users chronologically from oldest to most recent
print(f'number_to_add = {number_to_add}')
for i in range(number_to_add-1,-1,-1):
print(f'i = {i}')
participant = participants.users[i]
now = datetime.now()
date_added = now.strftime("%d/%m/%Y, %H:%M:%S")
channel_user_id, created = cudb.get_or_create(
id = participant.id,
defaults = {'first_name' : participant.first_name,
'last_name' : participant.last_name,
'username' : participant.username,
'phone' : participant.phone,
'is_bot' : participant.bot,
'date_added' : date_added}
)
# Prints status of DB addition
if (created):
print(f'successfully created channel_usersID = {channel_user_id}')
else:
print(f'did not create anything, user information found in channel_usersID {channel_user_id}')
https://docs.telethon.dev/en/stable/quick-references/events-reference.html?highlight=chataction#chataction here you are the docs for chataction, exactly what you need just make sure to filter the event.

Retrieving data from 2 python scripts

This is the first script which get's data from a website:
import requests
def get_prices():
name = "SeedifyFund"
crypto_data = requests.get("https://api.pancakeswap.info/api/tokens").json()["data"]
data = None
for i in crypto_data:
current = crypto_data[i]
if current['name'] == name:
data = {
"PriceUSD": current["price"],
"PriceBNB": current["price_BNB"],
}
return data
if __name__ == "__main__":
print(get_prices())
The code above outputs the following: {'PriceUSD': '1.022239219137518991087869433174527', 'PriceBNB': '0.002452203037583603303073246037795846'}
I'm having issue an issue with the second script. I want it to use the data that it has collected above and print it in a telegram bot when the user types /price. The code for the second script:
import telegram
from telegram.ext import Updater
from telegram.ext import CommandHandler
from tracker import get_prices
telegram_bot_token = "API TOKEN"
updater = Updater(token=telegram_bot_token, use_context=True)
dispatcher = updater.dispatcher
def price(update, context):
chat_id = update.effective_chat.id
message = ""
crypto_data = get_prices()
for i in crypto_data:
bnbprice = crypto_data[i]["pricebnb"]
usdprice = crypto_data[i]["priceusd"]
message += f"1 SFUND = \n${usdprice:,.2f} USD\n{bnbprice:.3f} BNB\n\n"
context.bot.send_message(chat_id=chat_id, text=message)
dispatcher.add_handler(CommandHandler("price", price))
updater.start_polling()
When the user types /price in the telegram chat it give this error:
coin = crypto_data[i]["pricebnb"]
TypeError: string indices must be integers
Could someone tell me what I'm doing wrong and help me solve the issue. Many thanks

asyncio not working on Google Cloud Functions

I have this function which works fine locally on my machine with python 3.8, but it throws runtime error on Google Cloud Functions.
def telegram_test(request):
request_json = request.get_json()
import datetime
import pandas as pd
from pyrogram import Client
session_string = "...............38Q8uTHG5gHwyWD8nW6h................."
# the rest of the authantication
api_id = 32494131641215
api_hash = "ioadsfsjnjksfgnfriuthg#qw]/zwq ]w/\lc ec,"
# one of bbc channels on telegram you want to access
channel_name = 'pyrogram'
# if you only want to get messages older than 7 days in unix style
seven_days = int((datetime.datetime.now() - datetime.timedelta(days=7)).timestamp())
# call telegram with parameters such as limit and date
# save the result to dataframe
with Client(session_string,api_id,api_hash, takeout=True,workers=2) as app:
hist_iter = app.iter_history(channel_name,offset_date=seven_days, limit=100)
msglist = [msg.__dict__ for msg in hist_iter]
df = pd.DataFrame(msglist)
print(df.head(5))
return f'it works!:{request_json}'
The error message I get from GCF log:
File "/opt/python3.8/lib/python3.8/asyncio/events.py", line 639, in
get_event_loop raise RuntimeError('There is no current event loop in
thread %r.' RuntimeError: There is no current event loop in thread
'ThreadPoolExecutor-0_0'.
Update
I updated the code, the runtime error gone. but I am getting time out error.
I put the timeout 180 secondes, but still when I test the function times out on 60 seconds.
Here is the updated code. Is there something I am doing wrong?
async def foo():
from datetime import datetime, timedelta
from pandas import DataFrame
from pyrogram import Client
import asyncio
session_string = "********zNmkubA4ibjsdjhsdfjlhweruifnjkldfioY5DE*********"
api_id = 325511548224831351
api_hash = "jdffjgtrkjhfklmrtgjtrm;sesews;;wex"
channel_name = 'cnn'
with Client(session_string, api_id, api_hash, takeout=True) as app:
hist_iter = app.iter_history(
channel_name, limit=10)
msglist = [msg.__dict__ for msg in hist_iter]
df = DataFrame(msglist)
return df
async def bar():
return await foo()
def test(request):
from asyncio import run
return run(bar())
The solution in the end was to change from Pyrogram to telethon and create the asyncio manaually before creating the client.
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
Note: you need valid session string, otherwise when you test the function, it will wait for you to auth with mobile number. so first run this code locally and authenticate, then copy the session string to the cloud function.
Here is the full code:
from telethon.sessions import StringSession
from telethon import TelegramClient
from pandas import DataFrame
import datetime
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
api_id = 101010101
api_hash = "jhafcgahagfbahgdbw17171736456gerf"
session_string = "hjksdhjbdsfhgbdsabeyitrgdsbfsdbdiyfhsbddasbdjdksf="
channel_name = 'bbcuzbek'
seven_days = int((datetime.datetime.now() -
datetime.timedelta(days=7)).timestamp())
client = TelegramClient(StringSession(session_string),
api_id, api_hash, loop=loop)
time_format = "%d/%m/%Y, %H:%M:%S"
download_date = datetime.datetime.now(
tz=datetime.timezone.utc).strftime(time_format)
cols = ["id", "date", "text", "views", "download_date"]
async def foo():
all_msgs = [[message.id, message.date.strftime(time_format), message.text, message.views, download_date] async for message in client.iter_messages(entity=channel_name, offset_date=seven_days, limit=10)]
df = DataFrame(data=all_msgs, columns=cols)
# write it to BQ
# print(df)
# async for message in client.iter_messages(entity=channel_name, offset_date=seven_days, limit=10):
# print(message.id, message.date, message.text, message.views)
print("it runs")
print(len(df))
return None
def test(request):
with client:
return client.loop.run_until_complete(foo())
bar() is redundant
You're trying to return a dataframe. Is it a valid HTTP response?
with -> async with
hist_iter = app.iter_history() -> hist_iter = await app.iter_history()
M.b. it waits for input?

Telethon events.NewMessage from specific channel

I would like to get the New Messages from a specific channel.
I have tried the following until now:
from telethon import TelegramClient, events
api_id = 242...
api_hash = '8a06ca620417c9964a058e0dc...'
bot_token = '1474729480:AAEhUPmVX_m...'
channelId = '-36744...'
client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
client.start()
#client.on(events.NewMessage(channelId))
async def main(event):
me = client.get_me()
print(me.stringify())
print(event.stringify())
client.run_until_disconnected()
Unfortunately, it did not work.
Does anyone know why? Did I miss something?
Greetings
Please use this:
chats = [channelId]
from telethon import TelegramClient, events
api_id = 242...
api_hash = '8a06ca620417c9964a058e0dc...'
bot_token = '1474729480:AAEhUPmVX_m...'
channelId = '-36744...'
client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
client.start()
#client.on(events.NewMessage(chats = [channelId]))
async def main(event):
me = client.get_me()
print(me.stringify())
print(event.stringify())
client.run_until_disconnected()

How to get all users in a telegram channel using telethon?

I'm new to telethon and python. I have installed telethon in python3 and I want to get all members of a telegram channel or a group . I was searching a lot in the internet and found below code . And I'm trying really hard to understand it .Telegram documentation is not enough to do this . Is there a better solution ?
from telethon import TelegramClient
from telethon.tl.functions.contacts import ResolveUsernameRequest
from telethon.tl.functions.channels import GetAdminLogRequest
from telethon.tl.functions.channels import GetParticipantsRequest
from telethon.tl.types import ChannelParticipantsRecent
from telethon.tl.types import InputChannel
from telethon.tl.types import ChannelAdminLogEventsFilter
from telethon.tl.types import InputUserSelf
from telethon.tl.types import InputUser
# These example values won't work. You must get your own api_id and
# api_hash from https://my.telegram.org, under API Development.
api_id = 12345
api_hash = '8710a45f0f81d383qwertyuiop'
phone_number = '+123456789'
client = TelegramClient(phone_number, api_id, api_hash)
client.session.report_errors = False
client.connect()
if not client.is_user_authorized():
client.send_code_request(phone_number)
client.sign_in(phone_number, input('Enter the code: '))
channel = client(ResolveUsernameRequest('channelusername')) # Your channel username
user = client(ResolveUsernameRequest('admin')) # Your channel admin username
admins = [InputUserSelf(), InputUser(user.users[0].id, user.users[0].access_hash)] # admins
admins = [] # No need admins for join and leave and invite filters
filter = None # All events
filter = ChannelAdminLogEventsFilter(True, False, False, False, True, True, True, True, True, True, True, True, True, True)
cont = 0
list = [0,100,200,300]
for num in list:
result = client(GetParticipantsRequest(InputChannel(channel.chats[0].id, channel.chats[0].access_hash), filter, num, 100))
for _user in result.users:
print( str(_user.id) + ';' + str(_user.username) + ';' + str(_user.first_name) + ';' + str(_user.last_name) )
with open(''.join(['users/', str(_user.id)]), 'w') as f: f.write(str(_user.id))
But I'm getting this error . What have I missed ?
Traceback (most recent call last):
File "run.py", line 51, in <module>
result = client(GetParticipantsRequest(InputChannel(channel.chats[0].id, channel.chats[0].access_hash), filter, num, 100))
TypeError: __init__() missing 1 required positional argument: 'hash'
Sean answer won't make any difference.
Your code works for older Telethon versions. In the new versions, a new argument hash is added to GetParticipantsRequest method. Therefore, you need to pass hash as an argument too. Add hash=0 like this:
result = client(GetParticipantsRequest(InputChannel(channel.chats[0].id, channel.chats[0].access_hash), filter, num, 100, 0))
Note that the hash of the request is not the channel hash. It's a special hash calculated based on the participants you already know about, so Telegram can avoid resending the whole thing. You can just leave it to 0.
Here is an up-to-date example from official Telethon wiki.
channel = client(ResolveUsernameRequest('channel_name'))
user_list = client.iter_participants(entity=channel)
for _user in user_list:
print(_user)
or
user_list = client.get_participants(entity=channel)
for _user in user_list:
print(_user)
from
I think You can use this code in the new version of Telethon
from telethon import TelegramClient
from telethon.tl.functions.channels import GetParticipantsRequest
from telethon.tl.functions.channels import GetFullChannelRequest
from telethon.tl.types import ChannelParticipantsSearch
api_id = XXXXX
api_hash = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
phone_number = '+98XXXXXXXX'
################################################
channel_username = 'tehrandb'
################################################
client = TelegramClient('session_name',api_id,api_hash)
assert client.connect()
if not client.is_user_authorized():
client.send_code_request(phone_number)
me = client.sign_in(phone_number, input('Enter code: '))
# ---------------------------------------
offset = 0
limit = 200
my_filter = ChannelParticipantsSearch('')
all_participants = []
while_condition = True
# ---------------------------------------
channel = client(GetFullChannelRequest(channel_username))
while while_condition:
participants = client(GetParticipantsRequest(channel=channel_username, filter=my_filter, offset=offset, limit=limit, hash=0))
all_participants.extend(participants.users)
offset += len(participants.users)
if len(participants.users) < limit:
while_condition = False
I used ā€¨Telethon V0.19, but the previous versions are pretty much the same
you can try the code below, it works, I tested. But I do have a question, because the Telethon library doesn't extra all the users, it only extra like 90% of the users. I think it somehow skipped some ones... don't know why.
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetDialogsRequest
from telethon.tl.types import InputPeerEmpty
import csv
api_id = 123456
api_hash = 'YOUR_API_HASH'
phone = '+111111111111'
client = TelegramClient(phone, api_id, api_hash)
client.connect()
if not client.is_user_authorized():
client.send_code_request(phone)
client.sign_in(phone, input('Enter the code: '))
chats = []
last_date = None
chunk_size = 200
groups=[]
result = client(GetDialogsRequest(
offset_date=last_date,
offset_id=0,
offset_peer=InputPeerEmpty(),
limit=chunk_size,
hash = 0
))
chats.extend(result.chats)
for chat in chats:
try:
if chat.megagroup== True:
groups.append(chat)
except:
continue
print('Choose a group to scrape members from:')
i=0
for g in groups:
print(str(i) + '- ' + g.title)
i+=1
g_index = input("Enter a Number: ")
target_group=groups[int(g_index)]
print('Fetching Members...')
all_participants = []
all_participants = client.get_participants(target_group, aggressive=True)
print('Saving In file...')
with open("members.csv","w",encoding='UTF-8') as f:
writer = csv.writer(f,delimiter=",",lineterminator="\n")
writer.writerow(['username','user id', 'access hash','name','group', 'group id'])
for user in all_participants:
if user.username:
username= user.username
else:
username= ""
if user.first_name:
first_name= user.first_name
else:
first_name= ""
if user.last_name:
last_name= user.last_name
else:
last_name= ""
name= (first_name + ' ' + last_name).strip()
writer.writerow([username,user.id,user.access_hash,name,target_group.title, target_group.id])
print('Members scraped successfully.')
The simple way to get all users of a telegram channel using telethon. And make sure that you have valid permissions used for the channel or group (caused by GetParticipantsRequest).
from telethon.tl.functions.contacts import ResolveUsernameRequest
channel = await client(ResolveUsernameRequest('channel_name'))
async for _user in client.iter_participants(entity=channel):
print(_user)
Use client.invoke() instead of client().
You can refer to official guide.

Categories