python stock price using BeautifulSoup

python stock price using BeautifulSoup - python

I am trying to get the price of the stock using the below code, it returns null for the current price. Please let me know where I am making a error
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq
my_url = 'https://finance.yahoo.com/quote/MMM/key-statistics?p=MMM'
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup =soup(page_html,"lxml")
uClient.close()
# I tried this option 1
currentPrice = page_soup.find('div',attrs={"span": "Trsdu(0.3s) Fw(b) Fz(36px) Mb(-4px) D(ib)"})
print(currentPrice)
# I tried this option 2
for currentPrice in page_soup.find("div",{"class": "D(ib) Mend(20px)"}) :
print (page_soup.span)

You might want to have a look at yfinance
https://pypi.org/project/yfinance/

Related

Error during a loop to extract content from an scraped link using BeautifulSoup

I've been working on this web scraper for a while and trying to get the body content of different links of an online newsletter. Therefore, if I breakdown the code for the second loop and run it separately, it will return the correct results, however, if the same part is put inside a loop in the bigger script, it will return the error "IndexError: list index out of range".
This is the script that 2nd loop returns the error (UPDATED):
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import pandas as pd
def pages(myurl):
# opening up connection, grabbing the page
uClient = uReq(myurl)
page_html = uClient.read()
uClient.close()
# html parsing
page_soup = soup(page_html, "html.parser")
dt = []
ttl = []
name = []
body = []
source = []
# grabs each newsletter subjects
titular = page_soup.findAll("div",{"class":"col-md-9 col-sm-9 col-xs-9"})
titular[0]
tit1 = titular[0]
fixed = 'https://www.df.cl/noticias/site/tax/port/all'
for tit1 in titular:
date = tit1.span.text
dt.append(date)
title = tit1.h2.a.text
ttl.append(title)
link = tit1.h2.a["href"].strip()
source.append(fixed + link)
df = pd.DataFrame(dt, columns =['date'])
df['title'] = ttl
df['link'] = source
for link in df['link']:
new_link = fixed + link
page = uReq(new_link)
page_html_1 = page.read()
page.close()
page_soup = soup(page_html_1, "html.parser")
content = page_soup.findAll("div",{"class":"entry cuerpo-noticias CUERPO"})
content[0].text
cont1 = content[0].text
body.append(cont1)
df['content'] = body
print(df)
#df.to_csv(u'C:/Users/snpw9/Desktop/Scripts/sample_scrap.csv', mode='a', header=False, index=False)
pages('https://www.df.cl/noticias/site/tax/port/all/taxport_3_230__1.html') #Banca y Fintech
pages('https://www.df.cl/noticias/site/tax/port/all/taxport_3_20__1.html') #Bolsa y Monedas
pages('https://www.df.cl/noticias/site/tax/port/all/taxport_3_226__1.html') #Pensiones
pages('https://www.df.cl/noticias/site/tax/port/all/taxport_3_228__1.html') #Seguros
It would be very helpful to make this part work, hopefully, with your help!
The second script without the loop (which work properly):
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
myurl = 'https://www.df.cl/noticias/site/tax/port/all/noticias/mercados/banca-fintech/bancoestado-
destina-90-millones-para-los-gastos-de-internet-de-sus/2020-07-07/152240.html'
#def pages(myurl):
# opening up connection, grabbing the page
uClient = uReq(myurl)
page_html = uClient.read()
uClient.close()
#html parsing
page_soup = soup(page_html, "html.parser")
#grabs each newsletter subjects
content = page_soup.findAll("div",{"class":"entry cuerpo-noticias CUERPO"})
cont1 = content[0].text
print(cont1)

Access the second span in loop using Beautiful Soup

How can I access the second span element in a found div?
My code looks like this:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
#url website
url = 'https://www.gumtree.pl/a-samochody-osobowe/krakow/honda-civic-1-5-sport-mt-2019/1006295441720911232816609'
#opening up connection, grabbing page
uClient = uReq(url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, 'html.parser')
#scraping div
product = page_soup.find('div', {'class': 'vip-details'})
for li in product.findAll('div', {'class': 'attribute'}):
print(li)
The result is something like this:
[...]
<div class="attribute"><span class="name">Rok</span><span class="value">2019</span></div>
<div class="attribute"><span class="name">Kilometry</span><span class="value">12</span></div>
[...]
How to get access to 2 span element? in my case to value 2019 and 12
I tried to use such solutions but without success:
for li in product.findAll('div', {'class': 'attribute'}):
print(li.span.span)
and
for li in product.findAll('div', {'class': 'attribute'}):
print(li.span[1])
How can I access the second span of an element in my loop?

You can use find_next()
for li in product.findAll('div', {'class': 'attribute'}):
print(li.find_next("span" , class_='value').text)
Or Use css selector.
for li in product.select('.attribute .value'):
print(li.text)
Code:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
#url website
url = 'https://www.gumtree.pl/a-samochody-osobowe/krakow/honda-civic-1-5-sport-mt-2019/1006295441720911232816609'
uClient = uReq(url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, 'html.parser')
product = page_soup.select_one('.vip-details')
for li in product.select('.attribute .value'):
print(li.text)

I got my expected result by using two loops:
for li in product.findAll('div', {'class': 'attribute'}):
for value in li.findAll('span', {'class': 'value'}):
print(value.text)
if there is a simpler solution to the problem, I invite you to comment.

Why does my scraping script returns empty result

I am practicing here and my goal is to retrieve these data from the page in the url variable:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
url = "https://www.newegg.com/global/bg-en/PS4-Accessories/SubCategory/ID-3142"
# opening connection, grabing the page
uClient = uReq(url)
page_html = uClient.read()
uClient.close()
# html parser
page_soup = soup(page_html, "html.parser")
# grabs each product
containers = page_soup.findAll("div", {"class": "item-container"})
for container in containers:
brand = container.select("div.item-info")[0].a.img["title"]
name = container.findAll("a", {"class": "item-title"})[0].text.strip()
shipping = container.findAll("li", {"class": "price-ship"})[0].text.strip()
print("brand " + brand)
print("name " + name)
print("shipping " + shipping)
Nothing more I can say for it :) I just simple as that but I still can't get it why no data is retrieved. Will be thankful for every advice!

You are invoking the find_all method with wrong arguments.
You should use the argument "class_" properly, according to the documentation found here:
https://www.crummy.com/software/BeautifulSoup/bs4/doc/#searching-by-css-class

I'm using Python 3.7 an BS4 for web scraping, there is a problem I couldn't solve, hope someone knows how to fix this

I suppose to get product information from source page, the data I want is in the HTML tag , but there is another tag in tag, so when I save the data to local storage, it looks very bad. I hope someone knows how to fix this problem.
Here is my code:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = 'https://list.jd.com/list.html?
cat=9987,653,655&ev=exbrand_15127&page=1'
#opening up connection, grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
#html parsing
page_soup = soup(page_html, "html.parser")
filename = "params.csv"
f = open(filename,"w")
#grabs each product
li_containers = page_soup.findAll("li",{"class":"gl-item"})
for i in range(0,len(li_containers)):
p_name_div = li_containers[i].find("div",{"class":"p-name"})
p_name = p_name_div.a.em.text.strip()
print(p_name)
f.write(p_name)
f.close()
There is the some screenshots.
I wanted it to be like this:
But it ended up looking like this:
Without span tag
With span tag

Try this
my_url = 'https://list.jd.com/list.html?
cat=9987,653,655&ev=exbrand_15127&page=1'
#opening up connection, grabbing the page
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
#html parsing
page_soup = soup(page_html, "html.parser")
filename = "params.csv"
f = open(filename,"w")
#grabs each product
li_containers = page_soup.findAll("li",{"class":"gl-item"})
for i in range(0,len(li_containers)):
p_name_div = li_containers[i].find("div",{"class":"p-name"})
p_name = p_name_div.a.em.text.strip()
print(p_name.strip(" "))
f.write(p_name.strip(" "))
f.close()

Python: BeautifulSoup not always getting all text data

i've got a strange problem with my webscraper. I am trying to get the data from a website using BeautifulSoup.
My code works on 90% of all links i've tried out but on a few it does not read the page fully.
The text that intrests me is "1152x864"
When checking the soure code on my browser i clearly see the text:
<li class="x-block-grid-item">
<h3 style="margin: 0 0 0.35em;font-size: 1em;letter-spacing: 0.05em;line-height: 1">Resolution</h3>
<p class="man">1152x864</p>
</li>
But when I try to get the source via BeautifulSoup it only shows this:
<li class="x-block-grid-item">
<h3 style="margin: 0 0 0.35em;font-size: 1em;letter-spacing: 0.05em;line-height: 1">Resolution</h3>
</li>
This is my code:
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
my_url = 'https://prosettings.net/counterstrike/fer/'
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("li",{"class":"x-block-grid-item"})
cont_res = containers[8].p.text
print("Res: " + cont_res)
When I try a different link for example:
my_url = 'https://prosettings.net/counterstrike/fallen/'
Everything works fine.

Try this. It should not disappoint you:
from urllib.request import urlopen
from bs4 import BeautifulSoup
URL = 'https://prosettings.net/counterstrike/fer/'
res = urlopen(URL).read()
soup = BeautifulSoup(res, "lxml")
cont_res = ' '.join([item.find(class_="man").text for item in soup.find_all(class_="x-block-grid-item") if "Resolution" in item.text])
# or using .select()
# cont_res = ' '.join([item.select_one(".man").text for item in soup.select(".x-block-grid-item") if "Resolution" in item.text])
print("Res: " + cont_res)
Output:
Res: 1152x864

I'm used to BeautifulSoup.text printing out the text of every tag and its children, but there may be something funny going on with these <p>'s in particular. At any rate, you're not getting the right soup, so maybe try requests instead of urllib, and then dig straight for the <p> tags with bs4.
site = 'https://prosettings.net/counterstrike/fer/'
r = requests.get(site)
soup = BeautifulSoup(r.content, 'html.parser')
list2 = soup.find_all('p', class_='man')
for item in list2:
if item.find('p'):
print(item.text)
Gives me all the class="man" <p> tags' info:
400
2.50
1000
125
1.00
0
6
1
1152x864
4:3
stretched
240
It's literally just the Resolution tag. No idea why.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

python stock price using BeautifulSoup - python

You might want to have a look at yfinance https://pypi.org/project/yfinance/

Related

Error during a loop to extract content from an scraped link using BeautifulSoup

Access the second span in loop using Beautiful Soup

Why does my scraping script returns empty result

I'm using Python 3.7 an BS4 for web scraping, there is a problem I couldn't solve, hope someone knows how to fix this

Python: BeautifulSoup not always getting all text data

Categories

Resources