Python Web Scraping Display additional data fields on screen - python

Good day. I need help again to do web scraping of the url https://bscscan.com/token/0x15a217c30b1303fbf64b8bdc3323fd3c14e9acb3#balances. There are still data that I wanted to display but cannot figure out how.
import re
import requests
from bs4 import BeautifulSoup
cotractpage = requests.get("https://bscscan.com/token/0x722126182535a93fb70633ea09c228b43894bf84#balances")
ca = BeautifulSoup(cotractpage.content, 'html.parser')
#print holders
tokenholders = ca.find(id='ContentPlaceHolder1_tr_tokenHolders').get_text()
tokenholdersa = "Total Holders: " + ((((tokenholders.strip()).strip("Holders:")).strip()).strip(" a ")).strip()
print(tokenholdersa)
#print transfers -> Not working
tokentransfers = ca.find(id='ContentPlaceHolder1_trNoOfTxns').get_text()
tokentransfersa = "Total Transfers: " + ((((tokentransfers.strip()).strip("Transfers:")).strip()))
print(tokentransfersa)
#print name
website = ca.find('span', class_='text-secondary small').get_text()
tokename = "Name: " + website
print(tokename)
#print marketcap
mcapa = ca.find(id='ContentPlaceHolder1_tr_valuepertoken').get_text()
print("Market Cap: " + re.search(r'Market Cap.*?([$\d,.]+)', mcapa, flags=re.S).group(1))
#print totalsupply
totalbox = ca.find('div', class_='col-md-8').get_text().split('(')[0].strip()
print("Total Supply: " + totalbox)
#Lock
lp = ca.find('i', class_='far fa-file-alt text-secondary').get_text()
print(lp)
Needs Help:
How to display the total transfers
How can I find the row with the icon(Lock) beside and print the percentage
How can I find the 0x000000000000000000000000000000000000dead and return the percentage
The code above displays:
Total Holders: 6
Name: FUCKTHEMOON
Market Cap: $0.00
Total Supply: 1,000,000,000,000,000 FUCKTHEMOON
Total Transfers: 7 <---- Does not work. Needs help
Lock: 83.5687% <---- Does not work. Needs help
Burn: 0x000000000000000000000000000000000000dead, 15.0000% <---- Does not work. Needs help

Related

Can I carry out a similar function to vlookup on a HTML table?

I'm trying to web scrape UFC fighters stats based on user input. I'm using beautiful soup and pandas. The idea is that the user input is matched to a fighters first and last name then returns their stats. Ideally I'd like to add the option of specifying which particular stat is required in a separate input. I've been able to pull the html table headers successfully but I don't know how to assign values to them which will correspond to the matching fighter name and print the associated value. In my code currently I'm splitting the fighter name input into first and last name, but I don't know how to then match them to the table data or how to return corresponding data. The data being returned currently is just the first line of results (fighter 'Tom Aaron') but no lookups or matching is being carried out. Are nested dictionaries the way to go? Any advice is greatly appreciated, this is my first python project so code is probably all over the place.
("Which fight do you want information on?"
input - Forrest Griffin
"What information do you want?:
input - Wins
"Forrest Griffin has won 8 times"
from bs4 import BeautifulSoup
import requests
import pandas as pd
website = "http://ufcstats.com/statistics/fighters?char=a&page=all"
response = requests.get(website)
response
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find('table', {'class' : 'b-statistics__table'}).find('tbody').find_all('tr')
len(results)
#print(results)
row = soup.find('tr')
print(row.get_text())
###attempting to split the table headers an assign
table = soup.find('table', {'class' : 'b-statistics__table'}).find('thead').find_all('tr')
#df=pd.read_html(str(table))[0]
#print(df.set_index(0).to_dict('dict'))
#firstname
first_name = str(results[0].find_all('td')[0].get_text())
print(first_name)
def first_names():
for names in first_name:
print(names)
return
#first_names()
last_name = results[1].find_all('td')[1].get_text()
print(last_name)
alias = results[1].find_all('td')[2].get_text()
if len(alias) == 0:
print("n/a")
else:
print(alias)
height = results[1].find_all('td')[3].get_text()
print(height)
weight = results[1].find_all('td')[4].get_text()
#print(weight)
wins = results[1].find_all('td')[7].get_text()
losses = results[1].find_all('td')[8].get_text()
draws = results[1].find_all('td')[9].get_text()
###split user input into list of first + second name
x = input("Which fighter do you want to know about?")
####print(str(first_name) + " " + str(last_name) + " has " + str(wins) + " wins, " + str(losses) + " losses and " + str(draws) + ".")
y = input("What do you want to know about?")
###if user input first name is in results row 1(Tom Aarons row) - still need to search through all result names
if x.split()[0] in str(results[1].find_all('td')[0].get_text()) and x.split()[1] in str(results[1].find_all('td')[1].get_text()) and y == "wins":
print(first_name+ " "+last_name+" has won " + wins + " times.")
if x.split()[1] in str(results[1].find_all('td')[1].get_text()):
print("ok")
else:
print('fail')
###Tom Test
print(x.split()[0])
###if input[1] = first_name and input2[2] == second_name:
if x.split()[1] == first_name:
print(x.split()[1])
if x.split()[0] in results[1] and x.split()[1] in results[1]:
print('wins')
else:
print("Who")
print(str(results[1].find_all('td')[0].get_text()))

Creating an API request with specific parameters

Currently I am using the following code to scrape https://www.nike.com/w/mens-shoes-nik1zy7ok for all shoes on the page:
import requests
import json
# I used a placeholder for the anchor parameter
uri = 'https://api.nike.com/cic/browse/v1?queryid=products&country=us&endpoint=product_feed/rollup_threads/v2?filter=marketplace(US)%26filter=language(en)%26filter=employeePrice(true)%26filter=attributeIds(0f64ecc7-d624-4e91-b171-b83a03dd8550%2C16633190-45e5-4830-a068-232ac7aea82c)%26anchor={}%26consumerChannelId=d9a5bc42-4b9c-4976-858a-f159cf99c647%26count=60'
# collect all products
store = []
with requests.Session() as session:
found_all_products = False
anchor = 0
while not found_all_products:
result = session.get(uri.format(anchor)).json()
products = result['data']['products']['products']
store += products
if len(products) < 60:
found_all_products = True
else:
anchor += 24
# filter by cloudProductId to get a dictionary with unique products
cloudProductIds = set()
unique_products = []
for product in store:
if not product['cloudProductId'] in cloudProductIds:
cloudProductIds.add(product['cloudProductId'])
unique_products.append(product)
How do I write this same api request to retrieve either the mens' shoes from this site or the womens' shoes on the womens shoes page: https://www.nike.com/w/womens-shoes-5e1x6zy7ok ? Which parameter do I need to change?
#Greg I ran your provided API link in Postman and getting different results for men and women. All I have changed in the query string parameters is UUIDs which is unique in both the cases for men it is uuids: 0f64ecc7-d624-4e91-b171-b83a03dd8550,16633190-45e5-4830-a068-232ac7aea82c and for women uuids: 16633190-45e5-4830-a068-232ac7aea82c,193af413-39b0-4d7e-ae34-558821381d3f,7baf216c-acc6-4452-9e07-39c2ca77ba32.
If you pass these 2 unique set of uuids in the query string then you will get men and women result separately as there is no other parameter which will define their identity.
Below is the code:
import json
import requests
#common query parameters
queryid = 'filteredProductsWithContext'
anonymousId = '25AFE5BE9BB9BC03DE89DBE170D80669'
language = 'en-GB'
country = 'IN'
channel = 'NIKE'
localizedRangeStr = '%7BlowestPrice%7D%E2%80%94%7BhighestPrice%7D'
#UUIDs
uuids_men = '0f64ecc7-d624-4e91-b171-b83a03dd8550,16633190-45e5-4830-a068-232ac7aea82c'
uuids_women = '16633190-45e5-4830-a068-232ac7aea82c,193af413-39b0-4d7e-ae34-558821381d3f,7baf216c-acc6-4452-9e07-39c2ca77ba32'
def get_men_result():
url = 'https://api.nike.com/cic/browse/v1?queryid=' + queryid + '&anonymousId=' + anonymousId + '&uuids=' + uuids_men + '&language=' + language + '&country=' + country + '&channel=' + channel + '&localizedRangeStr=' + localizedRangeStr
data = requests.get(url,verify = False).json()
print(data)
def get_women_result():
url = 'https://api.nike.com/cic/browse/v1?queryid=' + queryid + '&anonymousId=' + anonymousId + '&uuids=' + uuids_women + '&language=' + language + '&country=' + country + '&channel=' + channel + '&localizedRangeStr=' + localizedRangeStr
data = requests.get(url,verify = False).json()
print(data)
get_men_result()
print('-'*100)
get_women_result()
If you look at the query string which i have created for men and women you will notice that there are 6 common parameters and only uuid is unique. Also if you want you can change country, language etc for more data fetching. Please refer screenshots as well.
Men
Women

Insert table data from website into table on my own website using Python and Beautiful Soup

I wrote some code that grabs the numbers I need from this website, but I don't know what to do next.
It grabs the numbers from the table at the bottom. The ones under calving ease, birth weight, weaning weight, yearling weight, milk and total maternal.
#!/usr/bin/python
import urllib2
from bs4 import BeautifulSoup
import pyperclip
def getPageData(url):
if not ('abri.une.edu.au' in url):
return -1
webpage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webpage, "html.parser")
# This finds the epd tree and saves it as a searchable list
pedTreeTable = soup.find('table', {'class':'TablesEBVBox'})
# This puts all of the epds into a list.
# it looks for anything in pedTreeTable with an td tag.
pageData = pedTreeTable.findAll('td')
pageData.pop(7)
return pageData
def createPedigree(animalPageData):
''' make animalPageData much more useful. Strip the text out and put it in a dict.'''
animals = []
for animal in animalPageData:
animals.append(animal.text)
prettyPedigree = {
'calving_ease' : animals[18],
'birth_weight' : animals[19],
'wean_weight' : animals[20],
'year_weight' : animals[21],
'milk' : animals[22],
'total_mat' : animals[23]
}
for animalKey in prettyPedigree:
if animalKey != 'year_weight' and animalKey != 'dam':
prettyPedigree[animalKey] = stripRegNumber(prettyPedigree[animalKey])
return prettyPedigree
def stripRegNumber(animal):
'''returns the animal with its registration number stripped'''
lAnimal = animal.split()
strippedAnimal = ""
for word in lAnimal:
if not word.isdigit():
strippedAnimal += word + " "
return strippedAnimal
def prettify(pedigree):
''' Takes the pedigree and prints it out in a usable format '''
s = ''
pedString = ""
# this is also ugly, but it was the only way I found to format with a variable
cFormat = '{{:^{}}}'
rFormat = '{{:>{}}}'
#row 1 of string
s += rFormat.format(len(pedigree['calving_ease'])).format(
pedigree['calving_ease']) + '\n'
#row 2 of string
s += rFormat.format(len(pedigree['birth_weight'])).format(
pedigree['birth_weight']) + '\n'
#row 3 of string
s += rFormat.format(len(pedigree['wean_weight'])).format(
pedigree['wean_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['year_weight'])).format(
pedigree['year_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['milk'])).format(
pedigree['milk']) + '\n'
#row 5 of string
s += rFormat.format(len(pedigree['total_mat'])).format(
pedigree['total_mat']) + '\n'
return s
if __name__ == '__main__':
while True:
url = raw_input('Input a url you want to use to make life easier: \n')
pageData = getPageData(url)
s = prettify(createPedigree(pageData))
pyperclip.copy(s)
if len(s) > 0:
print 'the easy string has been copied to your clipboard'
I've just been using this code for easy copying and pasting. All I have to do is insert the URL, and it saves the numbers to my clipboard.
Now I want to use this code on my website; I want to be able to insert a URL in my HTML code, and it displays these numbers on my page in a table.
My questions are as follows:
How do I use the python code on the website?
How do I insert collected data into a table with HTML?
It sounds like you would want to use something like Django. Although the learning curve is a bit steep, it is worth it and it (of course) supports python.

Get more than 50 members of a group from Soundcloud

I'm trying to get all the members of a SoundCloud group using the Python API.
So far I can get the first 50 but providing the mentioned "linked_partitioning=1" argument doesn't seem to be move on to the next set of members.
My code is:
# Login and authenticate
client = soundcloud.Client(client_id = clientId,
client_secret = clientSecret,
username = username,
password = password)
# print authenticated user's username
print client.get('/me').username
# Get members
count = 1
total = 0;
while count > 0 and total < max:
members = client.get('/resolve', url=group_url, linked_partitioning=1)
count = len(members)
total += count
# Debug Output
print "Total: " + str(total) + ". Retrieved another " + str(count) + " members."
print members[0].username
I've been looking at: https://developers.soundcloud.com/docs/api/guide#pagination but still haven't managed to find a solution.
Snippet of working PHP code using linked_partioning and limit (max value can be 200). The default result set size is 50.
I use the limit with all the endpoints, but have not as yet touched groups, so I can't verify that it works there.
$qryString = self::API_ENDPOINT . self::SEARCH_API_ARTISTS
. "/" . $userId ."/tracks?" . $this->getClientId(true);
$qryString .= "&limit=" . self::MAX_API_PAGE_SIZE;
$qryString .= "&linked_partitioning=1";

Need to handle keyerror exception python

I'm getting a keyerror exception when I input a player name here that is not in the records list. I can search it and get back any valid name, but if I input anything else, i get a keyerror. I'm not really sure how to go about handling this since it's kindof confusing already dealing with like 3 sets of data created from parsing my file.
I know this code is bad I'm new to python so please excuse the mess - also note that this is a sortof test file to get this functionality working, which I will then write into functions in my real main file. Kindof a testbed here, if that makes any sense.
This is what my data file, stats4.txt, has in it:
[00000] Cho'Gath - 12/16/3 - Loss - 2012-11-22
[00001] Fizz - 12/5/16 - Win - 2012-11-22
[00002] Caitlyn - 13/4/6 - Win - 2012-11-22
[00003] Sona - 4/5/9 - Loss - 2012-11-23
[00004] Sona - 2/1/20 - Win - 2012-11-23
[00005] Sona - 6/3/17 - Loss - 2012-11-23
[00006] Caitlyn - 14/2/16 - Win - 2012-11-24
[00007] Lux - 10/2/14 - Win - 2012-11-24
[00008] Sona - 8/1/22 - Win - 2012-11-27
Here's my code:
import re
info = {}
records = []
search = []
with open('stats4.txt') as data:
for line in data:
gameid = [item.strip('[') for item in line.split(']')]
del gameid[-1]
gameidstr = ''.join(gameid)
gameid = gameidstr
line = line[7:]
player, stats, outcome, date = [item.strip() for item in line.split('-', 3)]
stats = dict(zip(('kills', 'deaths', 'assists'), map(int, stats.split('/'))))
date = tuple(map(int, date.split('-')))
info[player] = dict(zip(('gameid', 'player', 'stats', 'outcome', 'date'), (gameid, player, stats, outcome, date)))
records.append(tuple((gameid, info[player])))
print "\n\n", info, "\n\n" #print the info dictionary just to see
champ = raw_input() #get champion name
#print info[champ].get('stats').get('kills'), "\n\n"
#print "[%s] %s - %s/%s/%s - %s-%s-%s" % (info[champ].get('gameid'), champ, info[champ].get('stats').get('kills'), info[champ].get('stats').get('deaths'), info[champ].get('stats').get('assists'), info[champ].get('date')[0], info[champ].get('date')[1], info[champ].get('date')[2])
#print "\n\n"
#print info[champ].values()
i = 0
for item in records: #this prints out all records
print "\n", "[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (records[i][0], records[i][1]['player'], records[i][1]['stats']['kills'], records[i][1]['stats']['deaths'], records[i][1]['stats']['assists'], records[i][1]['outcome'], records[i][1]['date'][0], records[i][1]['date'][1], records[i][1]['date'][2])
i = i + 1
print "\n" + "*" * 50
i = 0
for item in records:
if champ in records[i][1]['player']:
search.append(records[i][1])
else:
pass
i = i + 1
s = 0
if not search:
print "no availble records" #how can I get this to print even if nothing is inputted in raw_input above for champ?
print "****"
for item in search:
print "\n[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (search[s]['gameid'], search[s]['player'], search[s]['stats']['kills'], search[s]['stats']['deaths'], search[s]['stats']['assists'], search[s]['outcome'], search[s]['date'][0], search[s]['date'][1], search[s]['date'][2])
s = s + 1
I tried setting up a Try; Except sort of thing but I couldn't get any different result when entering an invalid player name. I think I could probably set something up with a function and returning different things if the name is present or not but I think I've just gotten myself a bit confused. Also notice that no match does indeed print for the 8 records that aren't matches, though thats not quite how I want it to work. Basically I need to get something like that for any invalid input name, not just a valid input that happens to not be in a record in the loop.
Valid input names for this data are:
Cho'Gath, Fizz, Caitlyn, Sona, or Lux - anything else gives a keyerror, thats what I need to handle so it doesn't raise an error and instead just prints something like "no records available for that champion" (and prints that only once, rather then 8 times)
Thanks for any help!
[edit] I was finally able to update this code in the post (thank you martineau for getting it added in, for some reason backticks aren't working to block code and it was showing up as bold normal text when i pasted. Anyways, look at if not search, how can I get that to print even if nothing is entered at all? just pressing return on raw_input, currently it prints all records after **** even though i didn't give it any search champ
where is your exact error occurring?
i'm just assuming it is when champ = raw_input() #get champion name
and then info[champ]
you can either check if the key exists first
if champ not in info:
print 'no records avaialble'
or use get
if info.get(champ)
or you can just try and access the key
try:
info[champ]
# do stuff
except KeyError:
print 'no records available'
the more specific you can be in your question the better, although you explained your problem you really didn't include any specifics Please always include a traceback if available, and post the relevant code IN your post not on a link.
Here's some modifications that I think address your problem. I also reformatted the code to make it a little more readable. In Python it's possible to continue long lines onto the next either by ending with a \ or just going to the next line if there's an unpaired '(' or '[' on the previous line.
Also, the way I put code in my questions or answer here is by cutting it out of my text editor and then pasting it into the edit window, after that I make sure it's all selected and then just use the {} tool at the top of edit window to format it all.
import re
from pprint import pprint
info = {}
records = []
with open('stats4.txt') as data:
for line in data:
gameid = [item.strip('[') for item in line.split(']')]
del gameid[-1]
gameidstr = ''.join(gameid)
gameid = gameidstr
line = line[7:]
player, stats, outcome, date = [item.strip() for item in line.split('-', 3)]
stats = dict(zip(('kills', 'deaths', 'assists'), map(int, stats.split('/'))))
date = tuple(map(int, date.split('-')))
info[player] = dict(zip(('gameid', 'player', 'stats', 'outcome', 'date'),
(gameid, player, stats, outcome, date)))
records.append(tuple((gameid, info[player])))
#print "\n\n", info, "\n\n" #print the info dictionary just to see
pprint(info)
champ = raw_input("Champ's name: ") #get champion name
#print info[champ].get('stats').get('kills'), "\n\n"
#print "[%s] %s - %s/%s/%s - %s-%s-%s" % (
# info[champ].get('gameid'), champ, info[champ].get('stats').get('kills'),
# info[champ].get('stats').get('deaths'), info[champ].get('stats').get('assists'),
# info[champ].get('date')[0], info[champ].get('date')[1],
# info[champ].get('date')[2])
#print "\n\n"
#print info[champ].values()
i = 0
for item in records: #this prints out all records
print "\n", "[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (
records[i][0], records[i][1]['player'], records[i][1]['stats']['kills'],
records[i][1]['stats']['deaths'], records[i][1]['stats']['assists'],
records[i][1]['outcome'], records[i][1]['date'][0],
records[i][1]['date'][1], records[i][1]['date'][2])
i = i + 1
print "\n" + "*" * 50
i = 0
search = []
for item in records:
if champ in records[i][1]['player']:
search.append(records[i][1])
i = i + 1
if not search:
print "no match"
exit()
s = 0
for item in search:
print "\n[%s] %s - %s/%s/%s - %s - %s-%s-%s" % (search[s]['gameid'],
search[s]['player'], search[s]['stats']['kills'],
search[s]['stats']['deaths'], search[s]['stats']['assists'],
search[s]['outcome'], search[s]['date'][0], search[s]['date'][1],
search[s]['date'][2])
s = s + 1

Categories