Python - Not Printing All Data - python

I created a code that constantly updates all the currency values around the world.
Currently with the code, it only displays the euro value, when it should display USD, Euro, Rupees, etc. Could anyone please tell me why this is only displaying one value?
import time
import os
import requests
from bs4 import BeautifulSoup
def refresh():
URL = "https://www.x-rates.com/table/?from=USD&amount=1"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html.parser')
ratelist = soup.findAll("table", {"class": "ratesTable"})[0].findAll("tbody")
for tableVal in ratelist:
trList = tableVal.findAll('tr')
for trVal in trList[:6]:
print(trVal.text)
time.sleep(5)
os.system('cls')
refresh()
refresh()
Have a good day,
Bipolar Sheep

I would say your last refresh() statement should be outside of the for loops.
In your code, it refreshes after the first trVal has been printed, so it begins the refresh() statement once again. Please try this version :
import time
import os
import requests
from bs4 import BeautifulSoup
def refresh():
URL = "https://www.x-rates.com/table/?from=USD&amount=1"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html.parser')
ratelist = soup.findAll("table", {"class": "ratesTable"})[0].findAll("tbody")
for tableVal in ratelist:
trList = tableVal.findAll('tr')
for trVal in trList[:6]:
print(trVal.text)
time.sleep(5)
os.system('cls')
refresh()
refresh()
EDIT : I also choose to drop the os.system('cls') from the loops.

Related

Movie review crawling

I want to crawl all these movie reviews in this page.
Which part in red circle
I tried to crawl with this code. (I used Jupiter Notebook-Anaconda3)
import requests
from bs4 import BeautifulSoup
test_url = "https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code=174903&type=after&page=1"
resp = requests.get(test_url)
soup = BeautifulSoup(resp.content, 'html.parser')
soup
score_result = soup.find('div', {'class': 'score_result'})
lis = score_result.findAll('li')
lis[:3]
from urllib.request import urljoin #When I ran this block and next block it didn't save any reviews.
review_text=[]
#review_text = lis[0].find('p').getText()
list_soup =soup.find_all('li', 'p')
for item in list_soup:
review_text.append(item.find('p').get_text())
review_text[:5] #Nothing was saved.
As I wrote in third block and forth block nothing was saved. What is the problem?
This will get what you want. Tested in python within Jupyter Notebook (latest)
import requests
from bs4 import BeautifulSoup
from bs4.element import NavigableString
test_url = "https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code=174903&type=after&page=1"
resp = requests.get(test_url)
soup = BeautifulSoup(resp.content, 'html.parser')
movie_lst = soup.select_one('div.score_result')
ul_movie_lst = movie_lst.ul
for movie in ul_movie_lst:
if isinstance(movie, NavigableString):
continue
score = movie.select_one('div.star_score em').text
name = movie.select_one('div.score_reple p span').text
review = movie.select_one('div.score_reple dl dt em a span').text
print(score + "\t" + name)
print("\t" + review)
Preview

bs4 scraping python get contents until specific class name

I want to scrape this site
https://www.eduvision.edu.pk/institutions-detail.php?city=51I&institute=5_allama-iqbal-open-university-islamabad
and i want only the bachelor data in this url which is under class name=academicsList and i don't want below MS(MASTERS) data.
I want my scraper to stop before ms data. my logic is that we can set temporary incrementor on class=academicsHead and it should stop when it gets second academicsHead
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
ua = UserAgent()
header = {'user-agent':ua.chrome}
response = requests.get('https://www.eduvision.edu.pk/institutions-detail.php?city=51I&institute=5_allama-iqbal-open-university-islamabad',headers=header)
soup = BeautifulSoup(response.content, 'html.parser')
disciplines = soup.findAll("ul", {"class": "academicsList"})
#temp = soup.findAll("ul",{"class":"academicsHead"})
#stop at second academicsHead
for d in disciplines:
print(d.findAll('li')[0].text)
We can check if the class is 'academicsHead' and if it is just check if the text is BACHELOR if not break the loop.
Something like this would work:
disciplines = soup.findAll('ul',attrs={'class':re.compile(r'academics+(.)+')})
for i in disciplines:
if i['class'][0] == 'academicsHead':
if i.find('li').text.strip() != 'BACHELOR':
break
else:
print(i.find('li').text.strip())

How do i get a specific word phrase out of a word soup with beautiful soup?

I already sorted my code with BeautifulSoup and come out with this:
<bound method Tag.prettify of <script type="text/javascript">var LifeTimeStats = [{"Key":"Top 3","Value":"31"},{"Key":"Top 5s","Value":"36"},{"Key":"Top 3s","Value":"13"},{"Key":"Top 6s","Value":"27"},{"Key":"Top 12s","Value":"76"},{"Key":"Top 25s","Value":"58"},{"Key":"Score","Value":"99,788"},{"Key":"Matches Played","Value":"502"},{"Key":"Wins","Value":"9"},{"Key":"Win%","Value":"2%"},{"Key":"Kills","Value":"730"},{"Key":"K/d","Value":"1.48"}];</script>>
I am trying to get the specific Value "730"
from this :
{"Key":"Kills","Value":"730"}
As there are no HTML tags I can sort by. I have no idea, how to get this specific value. Do you have any idea?
Maybe there is another solution to get there...
Here is the full code:
#----WEB INPUT BASIC----
#import bs4
from urllib.request import urlopen as uReq
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup
#setting my url
url = 'https://fortnitetracker.com/profile/psn/Rehgum'
#making my https page work
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
urlopen(req).close()
#html parsing
page_soup = soup(webpage, "html.parser")
lifetime = page_soup.findAll("script",{"type":"text/javascript"})
stats = lifetime[3]
specific = stats.prettify
value = specific.text
#from here there is just code to put that value in a .txt file
This is just an idea of what you could do:
Extract the JS code into a Python variable.
Make a regex operation extracting the value of the variable.
"JSONify" such variable value.
Extract the data you need.
As an extract:
a = '''var LifeTimeStats = [{"Key":"Top 3","Value":"31"},{"Key":"Top 5s","Value":"36"},{"Key":"Top 3s","Value":"13"},{"Key":"Top 6s","Value":"27"},{"Key":"Top 12s","Value":"76"},{"Key":"Top 25s","Value":"58"},{"Key":"Score","Value":"99,788"},{"Key":"Matches Played","Value":"502"},{"Key":"Wins","Value":"9"},{"Key":"Win%","Value":"2%"},{"Key":"Kills","Value":"730"},{"Key":"K/d","Value":"1.48"}];'''
b = re.findall(r'var.*?=\s*(.*?);', a)[0]
c = json.loads(b)
See the dummy full code I wrote.
UPDATE
After seeing the full code... This could be a solution for your problem.
I finally got it working!
The thing that produced my errors was the "def loop():" part.
Here is the final working code:
def loop():
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup
import json
import re
import time
#setting my url
url = 'https://fortnitetracker.com/profile/psn/Rehgum'
#making my https page work
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
urlopen(req).close()
#html parsing
page_soup = soup(webpage, "html.parser")
lifetime = page_soup.findAll("script",{"type":"text/javascript"})
stats = lifetime[3]
stats_var = re.findall(r'var.*?=\s*(.*?);', stats.text)[0]
vals = json.loads(stats_var)
for val in vals:
if val['Key'] == 'Kills':
num_kills = val['Value']
break
print('Num kills = {}'.format(num_kills))
with open('lifetime_wins.txt', 'w') as fd:
fd.write(str(num_kills))
time.sleep(30)
loop()
for i in range(1,2):
loop()
while i<1:
print ("Ende")
Big "Thank you" to #kazbeel. You saved my Day! +rep

Getting rid of html tags in python when scraping

So I'm trying to scrape the a box score for an NBA game from ESPN. I tried to get the names first but I'm having a difficult time getting rid of the html tags.
I've tried using
get_text(), .text(), .string_strip()
but they keep giving me errors.
Here's the code I'm working with right now.
from bs4 import BeautifulSoup
import requests
url= "http://scores.espn.com/nba/boxscore?gameId=400900407"
r = requests.get(url)
soup = BeautifulSoup(r.text,"html.parser")
name = []
for row in soup.find_all('tr')[1:]:
player_name = row.find('td', attrs={'class': 'name'})
name.append(player_name)
print(name)
Using player_name.text should work, but the problem is that sometimes row.find('td', attrs={'class': 'name'} is empty. Try like this:
if player_name:
name.append(player_name.text)
I solve this like that:
from bs4 import BeautifulSoup
import requests
url= "http://scores.espn.com/nba/boxscore?gameId=400900407"
r = requests.get(url)
soup = BeautifulSoup(r.text,"html.parser")
name = []
for row in soup.find_all('tr')[1:]:
try:
player_name = row.select('td.name span')[0].text
name.append(player_name)
except:
pass
print(name)
My code for your reference
import requests
from pyquery import PyQuery as pyq
url= "http://scores.espn.com/nba/boxscore?gameId=400900407"
r = requests.get(url)
doc = pyq(r.content)
print([h.text() for h in doc('.abbr').items()])

Stock price data refresh

I am very new and I am getting totally stuck with recent task. I want to autorefresh stock price automatically as it is changing. I am scrapping nasdaq.com
website for actual intraday price.
I have a recent code:
import bs4 as bs
import urllib
tiker = input("zadaj ticker: ")
url = urllib.request.urlopen("http://www.nasdaq.com/symbol/"+tiker+"/real-time")
stranka = url.read()
soup = bs.BeautifulSoup(stranka, 'lxml')
print (tiker.upper())
for each in soup.find('div', attrs={'id': 'qwidget_lastsale'}):
print(each.string)
I was only able to make an infinite loop while True but i get prints in lines despite i want to change only one line as actual price is changing.
very thank you for your notes.
You can achieve it by printing "\b" to remove the previously printed string and then printing on the same line:
import bs4 as bs
import urllib
import time
import sys
tiker = input("zadaj ticker: ")
print (tiker.upper())
written_string = ''
while True:
url = urllib.request.urlopen("http://www.nasdaq.com/symbol/"+tiker+"/real-time")
stranka = url.read()
soup = bs.BeautifulSoup(stranka, 'lxml')
for each in soup.find('div', attrs={'id': 'qwidget_lastsale'}):
for i in range(len(written_string)):
sys.stderr.write("\b")
sys.stderr.write(each.string)
written_string = each.string
time.sleep(1)

Categories