async def on_message(message):
if message.content.startswith(prefix):
msg = message.content[20:]
else:
return None
if msg == "bitcoin" or "BITCOIN" or "btc" or "BTC":
url = "https://coinmarketcap.com/currencies/bitcoin/"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(url, headers=hdr)
res = urlopen(req).read()
soup = BeautifulSoup(res, 'html.parser')
btc_t1 = soup.find_all("div", class_="priceValue___11gHJ")
btc_t1 = [each_line.get_text().strip() for each_line in btc_t1[:20]]
btc_t1 = " ".join(btc_t1)
time.sleep(0.1)
btc_t2 = soup.find_all("span", class_="sc-1v2ivon-0 fiaaIx")
btc_t2 = [each_line.get_text().strip() for each_line in btc_t2[:20]]
btc_t2 = " ".join(btc_t2)
time.sleep(0.1)
btc_t3 = soup.find_all("div", class_="statsValue___2iaoZ")
btc_t3 = [each_line.get_text().strip() for each_line in btc_t3[:1]]
btc_t3 = " ".join(btc_t3)
time.sleep(0.1)
btcem = discord.Embed(title="Bitcoin", description="BTC market price \nPowered by Coinmarketcap", color=0xF7931A)
btcem.set_thumbnail(url="https://s2.coinmarketcap.com/static/img/coins/64x64/1.png")
btcem.add_field(name="Market Price", value="Price: "+ str(btc_t1) +" | "+ str(btc_t2), inline=False)
btcem.add_field(name="Market Cap", value="Price: "+ str(btc_t3), inline=False)
# embed.set_footer(text="Market", icon_url="")
await message.channel.send(embed=btcem)
if msg == "ethereum" or "ETHEREUM" or "eth" or "ETH":
url = "https://coinmarketcap.com/currencies/ethereum/"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(url, headers=hdr)
res = urlopen(req).read()
soup = BeautifulSoup(res, 'html.parser')
eth_t1 = soup.find_all("div", class_="priceValue___11gHJ")
eth_t1 = [each_line.get_text().strip() for each_line in eth_t1[:20]]
eth_t1 = " ".join(eth_t1)
time.sleep(0.1)
eth_t2 = soup.find_all("span", class_="sc-1v2ivon-0 fiaaIx")
eth_t2 = [each_line.get_text().strip() for each_line in eth_t2[:20]]
eth_t2 = " ".join(eth_t2)
time.sleep(0.1)
eth_t3 = soup.find_all("div", class_="statsValue___2iaoZ")
eth_t3 = [each_line.get_text().strip() for each_line in eth_t3[:1]]
eth_t3 = " ".join(eth_t3)
time.sleep(0.1)
ethem = discord.Embed(title="Ethereum", description="ETH market price \nPowered by Coinmarketcap", color=0x131313)
ethem.set_thumbnail(url="https://s2.coinmarketcap.com/static/img/coins/64x64/1027.png")
ethem.add_field(name="Market Price", value="Price: "+ str(eth_t1) +" | "+ str(eth_t2), inline=False)
ethem.add_field(name="Market Cap", value="Price: "+ str(eth_t3), inline=False)
# embed.set_footer(text="Market", icon_url="")
await message.channel.send(embed=ethem)
I'm trying to make Discord Coin, a stock bot in Python. All the modules used in the code have been installed, and I want to send crawl data by embed message, but when %bitcoin (prefix = %), the Ethereum embed along with the bitcoin embed also comes out.
Your if is completely messed up.
msg == "bitcoin" or "BITCOIN" or "btc" or "BTC" is always true.
Your check should be.
if msg in ('bitcoin', 'BITCOIN', 'btc', 'BTC')
and this also wouldn't work in your case,
since you are doing msg = msg[20:],
it should be msg = msg[1:].
Now, I directly debugged your code, this isn't the way to ask a question on SO. You should be able to debug your code and questions on SO should be based on your algorithm, technique or documentation.
see debugging
wouldn't it be easier to make a commands out of this?
like that(i havent tested it but it should work and it would be prettier):
bot = commands.Bot(command_prefix="%")
#bot.command(aliases=["BITCOIN", "btc","BTC"])
async def bitcoin(self, ctx):
# your bitcoin code here
#bot.command(aliases=["ETHEREUM", "eth", "ETH"])
async def ethereum(self, ctx):
# your etherum code here
Related
I want to improve my code, changing the synchronous functions to asynchronous for a faster data extraction speed, but every time I run the program, I get the Print("Error").
async def soup(html):
soup = BeautifulSoup(html, 'html.parser')
return soup
async def title_bs4(html, tag, classes):
soup = await soup(html)
title = soup.findAll(tag, attrs={"class": classes})
title = [i.text for i in title]
return title
async def url_bs4(html, tag, classes):
soup = await soup(html)
url = soup.findAll(tag, attrs={"class": classes})
url = [i.text for i in url]
return url
async def price_xpath(html):
soup = await soup(html)
dom = etree.HTML(str(soup))
price = dom.xpath('//li[#class="ui-search-layout__item shops__layout-item"]//div[#class="ui-search-result__content-columns shops__content-columns"]/div[#class="ui-search-result__content-column ui-search-result__content-column--left shops__content-columns-left"]/div[1]/div//div[#class="ui-search-price__second-line shops__price-second-line"]//span[#class="price-tag-amount"]/span[2]')
price = [i.text.replace('.', '') for i in price]
return price
async def page_number_bs4(html, tag, classes):
soup = await soup(html)
page_number = soup.find(tag, attrs={"class": classes}).text
page_number = int(page_number)
return page_number
async def number_of_pages_bs4(html, tag, classes):
soup = await soup(html)
number_of_pages = soup.find(tag, attrs={"class": classes}).text
number_of_pages = int(number_of_pages.split(" ")[1])
return number_of_pages
async def next_xpath(html):
soup = await soup(html)
dom = etree.HTML(str(soup))
next = dom.xpath(
'//div[#class="ui-search-pagination shops__pagination-content"]/ul/li[contains(#class,"--next")]/a')[0].get('href')
return next
async def main(product):
web = "Mercado libre"
list_titles = []
list_urls = []
list_prices = []
next = 'https://listado.mercadolibre.com.co/' + str(product)
async with aiohttp.ClientSession() as session:
async with session.get(next) as response:
while True:
try:
title = await title_bs4(response, 'h2', 'ui-search-item__title shops__item-title')
list_titles.extend(title)
url = await url_bs4(response, 'a', 'ui-search-item__group__element shops__items-group-details ui-search-link')
list_titles.extend(url)
price = await price_xpath(response)
list_titles.extend(price)
page_number = await page_number_bs4(response, 'span', 'andes-pagination__link')
number_of_pages = await number_of_pages_bs4(response, 'li', 'andes-pagination__page-count')
except:
print("Error")
break
if page_number == number_of_pages:
break
next = await next_xpath(response)
df = pd.DataFrame({"shop": web, "titles": list_titles,
"links": list_urls, "prices": list_prices})
df.prices = df.prices.map(
lambda x: float(re.search(r"\d+", x).group(0)))
df.to_json("templates/product.json", orient='records')
return df
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
asyncio.run(main('samsung'))
except KeyboardInterrupt:
pass
My synchronous functions work very well but are very slow when it comes to wanting to extract data from the paginations.
I want to find the solution when running the program or if there is another better alternative to what I am looking for above.
When I run:
from bs4 import BeautifulSoup
import requests
import discord
from discord.ext import tasks
client = discord.Client()
#tasks.loop(minutes=1)
async def test():
channel = client.get_channel(973939538357522474)
await channel.send(takip)
#client.event
async def on_ready():
test.start()
async def takip():
url = ""
R = requests.get(url)
Soup = BeautifulSoup(R.text, "html5lib")
Title = Soup.find("h1", {"class": "pr-new-br"}).getText()
List = Soup.find("div", {"class": "pr-bx-nm with-org-prc"})
fiyat = List.find("span", {"class": "prc-dsc"}).getText()
degisenfiyat = float(fiyat.replace(",", ".").replace(" TL", ""))
if (degisenfiyat <= 200):
print("Fiyat düştü.")
client.run("")
I get:
A "<function takip at 0x00000244A7A440D0>" message in the discord channel
I want to use channel.send with the takip function. How do I do this?
takip is a function, takip() is what your function return
for example if you have this code
def my_sum(a, b):
return a + b
print(my_sum)
# This one print the function
# expected result : <function my_sum at 0x7f4dc82b7d30>
print(my_sum(1, 2))
# This one print what my function return (so 3 here)
# expected result : 3
In your code, you're sending your function to your discord channel, if you want to send "Fiyat düştü." if (degisenfiyat <= 200), you have to edit your code to this
from bs4 import BeautifulSoup
import requests
import discord
from discord.ext import tasks
client = discord.Client()
#tasks.loop(minutes=1)
async def test():
channel = client.get_channel(973939538357522474)
await channel.send(takip()) # Change here
#client.event
async def on_ready():
test.start()
def takip():
url = ""
R = requests.get(url)
Soup = BeautifulSoup(R.text, "html5lib")
Title = Soup.find("h1", {"class": "pr-new-br"}).getText()
List = Soup.find("div", {"class": "pr-bx-nm with-org-prc"})
fiyat = List.find("span", {"class": "prc-dsc"}).getText()
degisenfiyat = float(fiyat.replace(",", ".").replace(" TL", ""))
if (degisenfiyat <= 200):
return("Fiyat düştü.") # Change here
else:
return "degisenfiyat > 200"
client.run("")
However if (degisenfiyat > 200) this won't return anything so you will be sending None to your discord channel, I recommend you to add an else statement to return an error or more information (i.e.error : degisenfiyat > 200)
You're sending the function object. You need to call the function with parenthesis ()
This line:
await channel.send(takip)
Should be
await channel.send(takip())
takip() also needs to return a value to the caller, not print() it to the terminal. Use return instead of print:
if (degisenfiyat <= 200):
return "Fiyat düştü."
Consider this example of the function object, vs the returned value:
>>> def f():
... return "Hello"
...
>>> f()
'Hello'
>>> f
<function f at 0x103149fc0>
And finally, you need to remove the await from your function definition, as your call does not need to be asynchronous with the bot. This leaves your code at:
# ...
#tasks.loop(minutes=1)
async def test():
channel = client.get_channel(973939538357522474)
# Define x as the function's result before awaiting
x = takip()
await channel.send(x)
# ...
def takip():
url = ""
R = requests.get(url)
Soup = BeautifulSoup(R.text, "html5lib")
Title = Soup.find("h1", {"class": "pr-new-br"}).getText()
List = Soup.find("div", {"class": "pr-bx-nm with-org-prc"})
fiyat = List.find("span", {"class": "prc-dsc"}).getText()
degisenfiyat = float(fiyat.replace(",", ".").replace(" TL", ""))
if (degisenfiyat <= 200):
return "Fiyat düştü."
else:
return f"Error, degisenfiyat > 200. degisenfiyat = {degisenfiyat}"
client.run("")
I want to scrape the title and the URL of each Posting at the Forum of the URL, so that when a new Post is created with 1 of the Titles below i'd like to receive a Mail with that Link of the Post.
Please do not be so harsh with me i'm a beginner with Python and Scraping
I have multiple Problems.
1: at the While(True) Function the "soup" is red underlined with the Error: Undefined variable 'soup'
2: When commenting out the While(True) Function then the Program will not run. I get no error.
3: When there is a new Posting with one of my Criterias, how do I get the URL of that Post?
Titles
def Jeti_DC_16
def Jeti_DC_16_v2
def Jeti_DS_16
def Jeti_DS16_v2
My FullCode
from requests import get
from bs4 import BeautifulSoup
import re
import smtplib
import time
import lxml
import pprint
import json
URL = 'https://www.rc-network.de/forums/biete-rc-elektronik-zubeh%C3%B6r.135/'
def scrape_page_metadata(URL):
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36'}
pp = pprint.PrettyPrinter(indent=4)
response = get(URL, headers=headers)
soup = BeautifulSoup(response.content, "lxml")
metadata = {
'Jeti_DC_16': Jeti_DC_16(soup, URL),
'jeti_dc_16_2': Jeti_DC_16_v2(soup, URL),
'jeti_ds_16': Jeti_DS_16(soup, URL),
'jeti_ds_16_2': Jeti_DS_16_v2(soup, URL)
}
pp.pprint(metadata)
return metadata
def Jeti_DC_16(soup, URL):
jeti_dc_16 = None
if soup.name.string:
jeti_dc_16 = soup.title.string
elif soup.find_all("div", class_='structItem-title'):
jeti_dc_16 = soup.find_all(
"div", class_='structItem-title').get('text')
else:
jeti_dc_16 = URL.split('//')[1]
return jeti_dc_16.split('/')[0].rsplit('.')[1].capitalize()
return jeti_dc_16
def Jeti_DC_16_v2(soup, URL):
jeti_dc_16_v2 = None
if soup.name.string:
jeti_dc_16_v2 = soup.title.string
elif soup.find_all("div", class_='structItem-title'):
jeti_dc_16_v2 = soup.find_all(
"div", class_='structItem-title').get('text')
else:
jeti_dc_16_v2 = URL.split('//')[1]
return jeti_dc_16_v2.split('/')[0].rsplit('.')[1].capitalize()
return jeti_dc_16_v2
def Jeti_DS_16(soup, URL):
jeti_ds_16 = None
if soup.jeti_ds_16.string:
jeti_ds_16 = soup.jeti_ds_16.string
elif soup.find_all("div", class_='structItem-title'):
jeti_ds_16 = soup.find_all(
"div", class_='structItem-title').get('text')
else:
jeti_ds_16 = URL.split('//')[1]
return jeti_ds_16.split('/')[0].rsplit('.')[1].capitalize()
return jeti_ds_16
def Jeti_DS_16_v2(soup, URL):
jeti_ds_16_v2 = None
if soup.name.string:
jeti_ds_16_v2 = soup.title.string
elif soup.find_all("div", class_='structItem-title'):
jeti_ds_16_v2 = soup.find_all(
"div", class_='structItem-title').get('text')
else:
jeti_dc_16_v2 = URL.split('//')[1]
return jeti_dc_16_v2.split('/')[0].rsplit('.')[1].capitalize()
return jeti_ds_16_v2
# search_for_class = soup.find_all(
# 'div', class_='structItem-title')
# Jeti_DS_16 = soup.find_all(text="Jeti DS 16")
# Jeti_DS_16_v2 = soup.find_all(text="Jeti DS 16 2")
# Jeti_DC_16 = soup.find_all(text="Jeti DC 16")
# Jeti_DC_16_v2 = soup.find_all(text="Jeti DC 16 2")
if(Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2):
send_mail()
# # print('Die Nummer {0} {1} {2} {3} wurden gezogen'.format(
# # Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2))
# for i in soup.find_all('div', attrs={'class': 'structItem-title'}):
# print(i.a['href'])
# first_result = search_for_class[2]
# print(first_result.text)
# print(Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2)
def send_mail():
with open('/Users/blackbox/Desktop/SynologyDrive/Programmieren/rc-network/credentials.json', 'r') as myFile:
data = myFile.read()
obj = json.loads(data)
print("test: " + str(obj['passwd']))
server_ssl = smtplib.SMTP_SSL('smtp.gmail.com', 465)
server_ssl.ehlo()
# server.starttls()
# server.ehlo()
server_ssl.login('secure#gmail.com', 'secure')
subject = 'Es gibt ein neuer Post im RC-Network auf deine gespeicherte Anfragen. Sieh in dir an{Link to Post}'
body = 'Sieh es dir an Link: https://www.rc-network.de/forums/biete-rc-elektronik-zubeh%C3%B6r.135/'
msg = f"Subject: {subject}\n\n{body}"
emails = ["secure#gmx.de"]
server_ssl.sendmail(
'secure#gmail.com',
emails,
msg
)
print('e-Mail wurde versendet!')
# server_ssl.quit
while(True):
Jeti_DC_16(soup, URL)
Jeti_DC_16_v2(soup, URL)
Jeti_DS_16(soup, URL)
Jeti_DS_16_v2(soup, URL)
time.sleep(10)
# time.sleep(86400)
You create soup inside scrape_page_metadata and it is local varible which doesn't exist outside scrape_page_metadata. In while-loop you should rather use scrape_page_metadata() instead of functions Jeti_DC_16(), Jeti_DC_16_v2(), Jeti_DS_16(), Jeti_DS_16_v2()
And this functions gives you metadata which you should check instead of if(Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2)
More or less (you have to use correct value in place of ... because I don't know what you want to compare)
while True:
metadata = scrape_page_metadata(URL)
if metadata["Jeti_DC_16"] == ... and metadata["Jeti_DC_16_v2"] == ... and metadata["Jeti_DS_16"] == ... and metadata["Jeti_DS_16_v2"] == ...:
send_mail()
time.sleep(10)
But there are other problems
All your functions Jeti_DC_16, Jeti_DC_16_v2, Jeti_DS_16, Jeti_DS_16_v2 look the same and probably they return the same element. You could use one of them and delete others. Or you should change them and they should search different elements.
Probably you would have to use more print() to see values in variables and which part of code is executed because I think this code needs a lot changes yet.
For example find_all() gives list with results and you can't use get() which needs single element. You need for-loop to get all titles from all elements
More or less
jeti_ds_16_v2 = soup.find_all("div", class_='structItem-itle')
jeti_ds_16_v2 = [item.get('text') for item in jeti_ds_16_v2]
I get this error on the last line of my code. If anyone has encountered with the same problem, I'll be glad to share with me on how to solve it.
The source code is telethon based and is full. The execution is also successful but when wanna give response to userid it gives the UnboundLocalError.
The codes:
#client.on(events.NewMessage(incoming=True, from_users=(723428565, 677543378)))
async def _(event):
if event.fwd_from:
return
url = "http://www.google.com"
if event.reply_to_msg_id and "allow" in event.raw_text:
previous_message = await event.get_reply_message()
previous_message_text = previous_message.message
if previous_message.media:
downloaded_file_name = await client.download_media(
previous_message,
path,
)
surl = "{}/searchbyimage/upload".format(url)
multipart = {
"encoded_image": (
downloaded_file_name,
open(downloaded_file_name, "rb"),
),
"image_content": "",
}
google_rs_response = requests.post(
surl, files=multipart, allow_redirects=False
)
the_location = google_rs_response.headers.get("Location")
os.remove(downloaded_file_name)
else:
previous_message_text = previous_message.message
surl = "{}/searchbyimage?image_url={}"
request_url = surl.format(url, previous_message_text)
google_rs_response = requests.get(request_url, allow_redirects=False)
the_location = google_rs_response.headers.get("Location")
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0"
}
response = requests.get(the_location, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
bro = soup.find_all("div", {"class": "r5a77d"})[0]
lol = bro.find("a")
url + lol.get("href")
final = lol.text
await event.edit(
event.chat_id, final.replace("me", "")
)
Error :
Line 42: UnboundLocalError: local variable 'final' referenced before assignment
You are defining the variable text = lol.text inside the if block if "allow" in event.raw_text:
So it looks like your condition wasn't met, and the variable text was never defined. So when you tried to access it await event.edit(event.chat_id, text.replace("me", "")) you got an error
I am trying to store different data for each server I'm setting my bot for I have looked through many forums and haven't found anything that fits my need, Here is my code:
#client.command()
async def pop(ctx, arg):
global pop2
pop2 = arg
URL = 'https://www.battlemetrics.com/servers/rust/' + arg
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
title = soup.find("h2").get_text()
title = title.replace('Connect', '')
soup = BeautifulSoup(page.content, 'html.parser')
page = soup.find('dl', class_='css-1i1egz4')
pop = page.find("dt", text="Player count")
status = page.find("dt", text="Status")
status1 = status.findNext("dd").get_text()
pop1 = pop.findNext("dd").get_text()
if status1 == "offline":
colour = 0xff0000
elif status1 == "online":
colour = 0x33ff0a
elif status1 == "dead":
colour = 0xff0000
embed = discord.Embed(title=title, description=status1, color=colour)
embed.add_field(name="Server", value=URL, inline=False)
embed.add_field(name="pop", value=pop1, inline=False)
await ctx.send("The server has been set to:")
await ctx.send(embed=embed)
What are you trying to achieve? just storing per-guild data? you could json on a small scale, or look into larger databases like MySQL or MongoDB