AttributeError when extracting data from a URL in Python - python
I am using the code below to try an extract the data at the table in this URL. However, I get the following error message:
Error: `AttributeError: 'NoneType' object has no attribute 'find'`in
the line `data = iter(soup.find("table", {"class":
"tablestats"}).find("th", {"class": "header"}).find_all_next("tr"))`
My code is as follows:
from bs4 import BeautifulSoup
import requests
r = requests.get(
"http://www.federalreserve.gov/econresdata/researchdata/feds200628_1.html")
soup = BeautifulSoup(r.content)
data = iter(soup.find("table", {"class": "tablestats"}).find("th", {"class": "header"}).find_all_next("tr"))
headers = (next(data).text, next(data).text)
table_items = [(a.text, b.text) for ele in data for a, b in [ele.find_all("td")]]
for a, b in table_items:
print(u"Date={}, Maturity={}".format(a, b if b.strip() else "null"))
Thank You
from bs4 import BeautifulSoup
import requests
r = requests.get(
"http://www.federalreserve.gov/econresdata/researchdata/feds200628_1.html")
soup = BeautifulSoup(r.content)
# column headers
h = data.find_all("th", scope="col")
# get all the tr tags after the headers
final = [[t.th.text] + [ele.text for ele in t.find_all("td")] for t in h[-1].find_all_next("tr")]
headers = [th.text for th in h]
The final out list is all the rows in individual lists:
[['2015-06-05', '4.82039691', '-4.66420959', '-4.18904598',
'-3.94541434', '1.1477', '2.9361', '3.3588', '0.6943', '1.5881',
'2.3034', '2.7677', '3.0363', '3.1801', '3.2537', '3.2930', '3.3190',
'3.3431', '3.3707', '3.4038', '3.4428', '3.4871', '3.5357', '3.5876',
'3.6419', '3.6975', '3.7538', '3.8100', '3.8656', '3.9202', '3.9734',
'4.0250', '4.0748', '4.1225', '4.1682', '4.2117', '4.2530', '4.2921',
'0.3489', '0.7464', '1.1502', '1.4949', '1.7700', '1.9841', '2.1500',
'2.2800', '2.3837', '2.4685', '2.5396', '2.6006', '2.6544', '2.7027',
'2.7469', '2.7878', '2.8260', '2.8621', '2.8964', '2.9291', '2.9603',
'2.9901', '3.0187', '3.0461', '3.0724', '3.0976', '3.1217', '3.1448',
'3.1669', '3.1881', '0.3487', '0.7469', '1.1536', '1.5039', '1.7862',
'2.0078', '2.1811', '2.3179', '2.4277', '2.5181', '2.5943', '2.6603',
'2.7190', '2.7722', '2.8215', '2.8677', '2.9117', '2.9538', '2.9944',
'3.0338', '3.0721', '3.1094', '3.1458', '3.1814', '3.2161', '3.2501',
'3.2832', '3.3156', '3.3472', '3.3781', '1.40431658', '9.48795888'],
['2015-06-04', '4.64953424', '-4.52780982', '-3.98051369',
......................................
The headers:
['BETA0', 'BETA1', 'BETA2', 'BETA3', 'SVEN1F01', 'SVEN1F04', 'SVEN1F09', 'SVENF01', 'SVENF02', 'SVENF03', 'SVENF04', 'SVENF05', 'SVENF06', 'SVENF07', 'SVENF08', 'SVENF09', 'SVENF10', 'SVENF11', 'SVENF12', 'SVENF13', 'SVENF14', 'SVENF15', 'SVENF16', 'SVENF17', 'SVENF18', 'SVENF19', 'SVENF20', 'SVENF21', 'SVENF22', 'SVENF23', 'SVENF24', 'SVENF25', 'SVENF26', 'SVENF27', 'SVENF28', 'SVENF29', 'SVENF30', 'SVENPY01', 'SVENPY02', 'SVENPY03', 'SVENPY04', 'SVENPY05', 'SVENPY06', 'SVENPY07', 'SVENPY08', 'SVENPY09', 'SVENPY10', 'SVENPY11', 'SVENPY12', 'SVENPY13', 'SVENPY14', 'SVENPY15', 'SVENPY16', 'SVENPY17', 'SVENPY18', 'SVENPY19', 'SVENPY20', 'SVENPY21', 'SVENPY22', 'SVENPY23', 'SVENPY24', 'SVENPY25', 'SVENPY26', 'SVENPY27', 'SVENPY28', 'SVENPY29', 'SVENPY30', 'SVENY01', 'SVENY02', 'SVENY03', 'SVENY04', 'SVENY05', 'SVENY06', 'SVENY07', 'SVENY08', 'SVENY09', 'SVENY10', 'SVENY11', 'SVENY12', 'SVENY13', 'SVENY14', 'SVENY15', 'SVENY16', 'SVENY17', 'SVENY18', 'SVENY19', 'SVENY20', 'SVENY21', 'SVENY22', 'SVENY23', 'SVENY24', 'SVENY25', 'SVENY26', 'SVENY27', 'SVENY28', 'SVENY29', 'SVENY30', 'TAU1', 'TAU2']
There are a lot of issues in your code.
There is no table with class 'tablestats'.
There are no 'th' fields with class 'header'.
Following line-
table_items = [(a.text, b.text) for ele in data for a, b in [ele.find_all("td")]]
doesnt return just 2 values, so cant assign to a, b
Related
Insert a dict into a sql table
i wanted to insert some data into my sql but having trouble because there is alot of columns so i would have to write alot of parameters after VALUE. I have a table with all attributes from the json file and a player_id which i add myself #Gamelogs for players and Teams import requests import json import psycopg2 # Connect to your postgres DB conn = psycopg2.connect("dbname=NBA user=postgres password=********") # Open a cursor to perform database operations cur = conn.cursor() cur.execute('CREATE TABLE player_logs("player_id" int,"GameId" int,"Date" int,"Team" VARCHAR(10),"Opponent" VARCHAR(10),"Minutes" int,"Arc3Assists" int,"Arc3FGA" int,"Arc3Frequency" int,"AssistPoints" int,"Assists" int,"AtRimAssists" int,"AtRimFG3AFrequency" int,"Avg2ptShotDistance" int,"Avg3ptShotDistance" int,"BadPassOutOfBoundsTurnovers" int,"BadPassSteals" int,"BadPassTurnovers" int,"Corner3FGA" int,"Corner3Frequency" int,"DeadBallTurnovers" int,"DefArc3ReboundPct" int,"DefFGReboundPct" int,"DefPoss" int,"DefRebounds" int,"DefThreePtReboundPct" int,"DefThreePtRebounds" int,"EfgPct" int,"FG2A" int,"FG2M" int,"FG3A" int,"FG3APct" int,"FTA" int,"Fg2Pct" int,"FirstChancePoints" int,"Fouls" int,"FoulsDrawn" int,"FtPoints" int,"LiveBallTurnoverPct" int,"LiveBallTurnovers" int,"LongMidRangeAccuracy" int,"LongMidRangeAssists" int,"LongMidRangeFGA" int,"LongMidRangeFGM" int,"LongMidRangeFrequency" int,"Loose Ball Fouls" int,"LostBallTurnovers" int,"NonHeaveArc3FGA" int,"OffFGReboundPct" int,"OffPoss" int,"OffRebounds" int,"OffShortMidRangeReboundPct" int,"OffTwoPtReboundPct" int,"OffTwoPtRebounds" int,"OnDefRtg" int,"OnOffRtg" int,"PenaltyArc3FGA" int,"PenaltyArc3Frequency" int,"PenaltyDefPoss" int,"PenaltyEfgPct" int,"PenaltyFG2A" int,"PenaltyFG2M" int,"PenaltyFG3A" int,"PenaltyFg2Pct" int,"PenaltyOffPoss" int,"PenaltyOffPossExcludingTakeFouls" int,"PenaltyOffPossPct" int,"PenaltyPoints" int,"PenaltyPointsExcludingTakeFouls" int,"PenaltyPointsPct" int,"PenaltyShotQualityAvg" int,"PenaltyTsPct" int,"PenaltyTurnovers" int,"Period2Fouls2Minutes" int,"Period3Fouls3Minutes" int,"PlusMinus" int,"Points" int,"PtsUnassisted2s" int,"Rebounds" int,"SecondChanceOffPoss" int,"SelfOReb" int,"SelfORebPct" int,"ShootingFouls" int,"ShootingFoulsDrawnPct" int,"ShortMidRangeAccuracy" int,"ShortMidRangeAssists" int,"ShortMidRangeFGA" int,"ShortMidRangeFGM" int,"ShortMidRangeFrequency" int,"ShortMidRangeOffReboundedPct" int,"ShotQualityAvg" int,"Steals" int,"ThreePtAssists" int,"TotalPoss" int,"TsPct" int,"Turnovers" int,"TwoPtAssists" int,"TwoPtShootingFoulsDrawn" int,"TwoPtShootingFoulsDrawnPct" int,"UnblockedLongMidRangeAccuracy" int,"UnblockedShortMidRangeAccuracy" int,"Usage" int,"Arc3Accuracy" int,"Arc3FGM" int,"Arc3PctAssisted" int,"Assisted2sPct" int,"Assisted3sPct" int,"AtRimAccuracy" int,"AtRimFGA" int,"AtRimFGM" int,"AtRimFrequency" int,"AtRimOffReboundedPct" int,"AtRimPctBlocked" int,"Blocked2s" int,"BlockedShortMidRange" int,"Blocks" int,"BlocksRecoveredPct" int,"Corner3Assists" int,"DefAtRimReboundPct" int,"DefLongMidRangeReboundPct" int,"DefShortMidRangeReboundPct" int,"DefTwoPtReboundPct" int,"DefTwoPtRebounds" int,"FG2APctBlocked" int,"FG3M" int,"Fg2aBlocked" int,"Fg3Pct" int,"LongMidRangeOffReboundedPct" int,"LostBallSteals" int,"NonHeaveArc3Accuracy" int,"NonHeaveArc3FGM" int,"NonHeaveFg3Pct" int,"NonPutbacksAssisted2sPct" int,"NonShootingFoulsDrawn" int,"NonShootingPenaltyNonTakeFoulsDrawn" int,"OffLongMidRangeReboundPct" int,"Offensive Fouls Drawn" int,"PenaltyArc3Accuracy" int,"PenaltyArc3FGM" int,"PenaltyAtRimAccuracy" int,"PenaltyAtRimFGA" int,"PenaltyAtRimFGM" int,"PenaltyAtRimFrequency" int,"PenaltyFG3M" int,"PenaltyFg3Pct" int,"PenaltyFtPoints" int,"PtsAssisted2s" int,"PtsAssisted3s" int,"PtsPutbacks" int,"PtsUnassisted3s" int,"RecoveredBlocks" int,"SecondChanceArc3FGA" int,"SecondChanceArc3Frequency" int,"SecondChanceEfgPct" int,"SecondChanceFG2A" int,"SecondChanceFG2M" int,"SecondChanceFG3A" int,"SecondChanceFg2Pct" int,"SecondChancePoints" int,"SecondChancePointsPct" int,"SecondChanceShotQualityAvg" int,"SecondChanceTsPct" int,"ShortMidRangePctAssisted" int,"ShortMidRangePctBlocked" int,"ThreePtShootingFoulsDrawn" int,"ThreePtShootingFoulsDrawnPct" int,"UnblockedArc3Accuracy" int,"UnblockedAtRimAccuracy" int,"OffArc3ReboundPct" int,"OffThreePtReboundPct" int,"OffThreePtRebounds" int,"Offensive Fouls" int,"Corner3Accuracy" int,"Corner3FGM" int,"ThreePtOffReboundedPct" int,"UnblockedCorner3Accuracy" int,"DefFTReboundPct" int,"FTDefRebounds" int,"Technical Free Throw Trips" int,"BlockedAtRim" int,"LostBallOutOfBoundsTurnovers" int,"OffAtRimReboundPct" int,"BlockedLongMidRange" int,"Charge Fouls Drawn" int,"LongMidRangePctAssisted" int,"NonShootingPenaltyNonTakeFouls" int,"SecondChanceTurnovers" int,"Travels" int,"SecondChanceAtRimFGA" int,"SecondChanceAtRimFrequency" int,"Clear Path Fouls" int,"DefCorner3ReboundPct" int,"HeaveAttempts" int,"LongMidRangePctBlocked" int,"2pt And 1 Free Throw Trips" int,"AtRimPctAssisted" int,"Period3Fouls4Minutes" int,"Period4Fouls4Minutes" int,"Charge Fouls" int,"Loose Ball Fouls Drawn" int,"PeriodOTFouls4Minutes" int,"SecondChanceAtRimAccuracy" int,"SecondChanceAtRimFGM" int,"PenaltyCorner3FGA" int,"PenaltyCorner3Frequency" int,"Corner3PctAssisted" int,"SecondChanceFtPoints" int,"OffCorner3ReboundPct" int,"SecondChanceArc3Accuracy" int,"SecondChanceArc3FGM" int,"SecondChanceFG3M" int,"SecondChanceFg3Pct" int,"3pt And 1 Free Throw Trips" int,"Defensive 3 Seconds Violations" int,"Period4Fouls5Minutes" int,"StepOutOfBoundsTurnovers" int,"Period1Fouls2Minutes"int)') x = 'https://api.pbpstats.com/get-all-players-for-league/nba' headers = {'user-agent': 'Chrome/88.0.4324.190'} jsonData1 = requests.get(x, headers=headers).json() # Player id and name EntityId = json.loads(json.dumps(jsonData1)[12:-1]) SeasonType = {'R':'Regular+Season','P':'Playoff+Season','A':'All'} EntityType = {'P':'Player','T':'Team'} Season = { '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21' } def log (S:Season,ST:SeasonType,EI:EntityId,ET:EntityType): url = 'https://api.pbpstats.com/get-game-logs/nba' payload = { 'Season': S, 'SeasonType': ST, 'EntityId': EI, 'EntityType': ET } r = requests.get(url, headers=headers, params=payload).json() if r == {'error': 'no results'} : return() else : for c in r['multi_row_table_data']: j = {'Player_id':EI} c.update(j) cur.execute('INSERT INTO player_log (Player_id,GameId,Date,Team,Opponent,Minutes,Arc3Assists,Arc3FGA,Arc3Frequency,AssistPoints,Assists,AtRimAssists,AtRimFG3AFrequency,Avg2ptShotDistance,Avg3ptShotDistance,BadPassOutOfBoundsTurnovers,BadPassSteals,BadPassTurnovers,Corner3FGA,Corner3Frequency,DeadBallTurnovers,DefArc3ReboundPct,DefFGReboundPct,DefPoss,DefRebounds,DefThreePtReboundPct,DefThreePtRebounds,EfgPct,FG2A,FG2M,FG3A,FG3APct,FTA,Fg2Pct,FirstChancePoints,Fouls,FoulsDrawn,FtPoints,LiveBallTurnoverPct,LiveBallTurnovers,LongMidRangeAccuracy,LongMidRangeAssists,LongMidRangeFGA,LongMidRangeFGM,LongMidRangeFrequency,Loose_Ball_Fouls,LostBallTurnovers,NonHeaveArc3FGA,OffFGReboundPct,OffPoss,OffRebounds,OffShortMidRangeReboundPct,OffTwoPtReboundPct,OffTwoPtRebounds,OnDefRtg,OnOffRtg,PenaltyArc3FGA,PenaltyArc3Frequency,PenaltyDefPoss,PenaltyEfgPct,PenaltyFG2A,PenaltyFG2M,PenaltyFG3A,PenaltyFg2Pct,PenaltyOffPoss,PenaltyOffPossExcludingTakeFouls,PenaltyOffPossPct,PenaltyPoints,PenaltyPointsExcludingTakeFouls,PenaltyPointsPct,PenaltyShotQualityAvg,PenaltyTsPct,PenaltyTurnovers,Period2Fouls2Minutes,Period3Fouls3Minutes,PlusMinus,Points,PtsUnassisted2s,Rebounds,SecondChanceOffPoss,SelfOReb,SelfORebPct,ShootingFouls,ShootingFoulsDrawnPct,ShortMidRangeAccuracy,ShortMidRangeAssists,ShortMidRangeFGA,ShortMidRangeFGM,ShortMidRangeFrequency,ShortMidRangeOffReboundedPct,ShotQualityAvg,Steals,ThreePtAssists,TotalPoss,TsPct,Turnovers,TwoPtAssists,TwoPtShootingFoulsDrawn,TwoPtShootingFoulsDrawnPct,UnblockedLongMidRangeAccuracy,UnblockedShortMidRangeAccuracy,Usage,Arc3Accuracy,Arc3FGM,Arc3PctAssisted,Assisted2sPct,Assisted3sPct,AtRimAccuracy,AtRimFGA,AtRimFGM,AtRimFrequency,AtRimOffReboundedPct,AtRimPctBlocked,Blocked2s,BlockedShortMidRange,Blocks,BlocksRecoveredPct,Corner3Assists,DefAtRimReboundPct,DefLongMidRangeReboundPct,DefShortMidRangeReboundPct,DefTwoPtReboundPct,DefTwoPtRebounds,FG2APctBlocked,FG3M,Fg2aBlocked,Fg3Pct,LongMidRangeOffReboundedPct,LostBallSteals,NonHeaveArc3Accuracy,NonHeaveArc3FGM,NonHeaveFg3Pct,NonPutbacksAssisted2sPct,NonShootingFoulsDrawn,NonShootingPenaltyNonTakeFoulsDrawn,OffLongMidRangeReboundPct,Offensive_Fouls_Drawn,PenaltyArc3Accuracy,PenaltyArc3FGM,PenaltyAtRimAccuracy,PenaltyAtRimFGA,PenaltyAtRimFGM,PenaltyAtRimFrequency,PenaltyFG3M,PenaltyFg3Pct,PenaltyFtPoints,PtsAssisted2s,PtsAssisted3s,PtsPutbacks,PtsUnassisted3s,RecoveredBlocks,SecondChanceArc3FGA,SecondChanceArc3Frequency,SecondChanceEfgPct,SecondChanceFG2A,SecondChanceFG2M,SecondChanceFG3A,SecondChanceFg2Pct,SecondChancePoints,SecondChancePointsPct,SecondChanceShotQualityAvg,SecondChanceTsPct,ShortMidRangePctAssisted,ShortMidRangePctBlocked,ThreePtShootingFoulsDrawn,ThreePtShootingFoulsDrawnPct,UnblockedArc3Accuracy,UnblockedAtRimAccuracy,OffArc3ReboundPct,OffThreePtReboundPct,OffThreePtRebounds,Offensive_Fouls,Corner3Accuracy,Corner3FGM,ThreePtOffReboundedPct,UnblockedCorner3Accuracy,DefFTReboundPct,FTDefRebounds,Technical_Free_Throw_Trips,BlockedAtRim,LostBallOutOfBoundsTurnovers,OffAtRimReboundPct,BlockedLongMidRange,Charge_Fouls_Drawn,LongMidRangePctAssisted,NonShootingPenaltyNonTakeFouls,SecondChanceTurnovers,Travels,SecondChanceAtRimFGA,SecondChanceAtRimFrequency,Clear_Path_Fouls,DefCorner3ReboundPct,HeaveAttempts,LongMidRangePctBlocked,"2pt_And_1_Free_Throw_Trips",AtRimPctAssisted,Period3Fouls4Minutes,Period4Fouls4Minutes,Charge_Fouls,Loose_Ball_Fouls_Drawn,PeriodOTFouls4Minutes,SecondChanceAtRimAccuracy,SecondChanceAtRimFGM,PenaltyCorner3FGA,PenaltyCorner3Frequency,Corner3PctAssisted,SecondChanceFtPoints,OffCorner3ReboundPct,SecondChanceArc3Accuracy,SecondChanceArc3FGM,SecondChanceFG3M,SecondChanceFg3Pct,"3pt_And_1_Free_Throw_Trips",Defensive_3_Seconds_Violations,Period4Fouls5Minutes,StepOutOfBoundsTurnovers,Period1Fouls2Minutes) VALUES', c) return() y=log('2020-21','Regular+Season','101108','Player') conn.commit() conn.close() cur.close() So was wondering if i could insert the data so it matched with the key and the column name. So the table and dict isnt order the same way either if it makes a difference.
This is fairly simple to do by adopting two helper libraries: pandas and preql. You can use pandas to load the json into a single dataframe, and then use preql to import it into the database. Here is runnable code demonstrating how to do it: import requests import pandas as pd from preql import Preql headers = {'user-agent': 'Chrome/88.0.4324.190'} def log(S,ST,EI,ET): url = 'https://api.pbpstats.com/get-game-logs/nba' payload = { 'Season': S, 'SeasonType': ST, 'EntityId': EI, 'EntityType': ET } r = requests.get(url, headers=headers, params=payload).json() if r == {'error': 'no results'} : return else: return [{'Player_id':EI, **d} for d in r['multi_row_table_data']] rows=log('2020-21','Regular+Season','101108','Player') df = pd.DataFrame.from_dict(rows) print("Dataframe shape:", df.shape) # (50, 218) p = Preql() # For postgres use: p = Preql("postgres://user:pass#server") p.import_pandas(my_table=df) print('SQL columns:', p('count(columns(my_table))')) # 219 - includes id print('SQL rows:', p('count(my_table)')) # 50 Note that this code example is currently using Python's built-in Sqlite, but you can easily make it work with postgres by providing Preql with the postgres URL, as the comment shows. Install them with pip install pandas preql-lang
Might not be the best solution, but I wrote a small function that just converts to SQL statement. def insert_into_table_query(table, data): col_names = list(data.keys()) task = tuple(data.values()) col_str = ', '.join(str(item) for item in col_names) col_str = '(' + col_str + ')' value_str = ', '.join('%s' for item in task) value_str = '(' + value_str + ');' sql = 'INSERT INTO {tn} '.format(tn=table) + col_str + ' VALUES ' + value_str return (sql, task)
Scraping Asxp page with beautiful soup
enter image description hereI am trying to scrape this page using Beautiful Soup, I 1st tried to find any API/Json behind the page which I couldn't find. And then I was trying a BS and HTML parser, but I can't get anywhere with it. I am not able to do so coz the page is calling a function with onclick GetFiiStatistics('F-INDEX FUTURES'). How can I go about scraping pages like these? webpage: https://www.motilaloswal.com/markets/derivative-market/FII-Statistics.aspx
The data is loaded via Javascript from their API. Unfortunately, I don't know, how INPUT, CATAGORY and FLAG values are computed: import json import requests import pandas as pd from bs4 import BeautifulSoup api_url = 'https://www.motilaloswal.com/ControllerBeta/APIRequest.aspx' data = { 'INPUT':'3yDBtksOiDjLLYaySd5NYgCcFnUOx8Jh2c8SRJvEhAs=', 'CATAGORY':'Q668CVoAKYOr7whA+PW25A==', 'FLAG':'VrTQQDRa72uSLVOyZO5Nqg==' } json_data = json.loads(requests.post(api_url, data=data).json()) # uncomment this to print all data: # print(json.dumps(json_data, indent=4)) df = pd.DataFrame(json_data) print(df) Prints: IndexType Date BuyContracts BuyValue SellContracts SellValue Net_BuySellValue OIContracts_eod OIValue_eod 0 INDEX FUTURES 2020-07-14T00:00:00 67037 4403.320 88350 6048.750 -1645.430 123985 8799.15 1 INDEX FUTURES 2020-07-13T00:00:00 53612 3644.000 61174 4142.300 -498.300 105126 7803.56 2 INDEX FUTURES 2020-07-10T00:00:00 62735 4313.250 83916 5923.270 -1610.020 113346 8369.47 3 INDEX FUTURES 2020-07-09T00:00:00 67222 4773.340 51094 3442.520 1330.820 119977 8971.28 4 INDEX FUTURES 2020-07-08T00:00:00 83367 5661.640 69042 4659.280 1002.360 106027 7686.32 .. ... ... ... ... ... ... ... ... ... 495 INDEX FUTURES 2018-07-05T00:00:00 28566 2585.923 21393 1881.601 704.322 243417 19847.72 496 INDEX FUTURES 2018-07-04T00:00:00 21339 1875.184 26507 2425.003 -549.819 245786 20201.28 497 INDEX FUTURES 2018-07-03T00:00:00 30019 2610.728 28837 2563.647 47.081 237564 19322.63 498 INDEX FUTURES 2018-07-02T00:00:00 24976 2191.751 29501 2541.589 -349.838 226100 18203.48 499 INDEX FUTURES 2018-06-29T00:00:00 45399 3814.480 27297 2371.817 1442.663 227041 18387.33 [500 rows x 9 columns] EDIT: To scrape other tabs, change the data= parameter in request: import json import requests import pandas as pd from bs4 import BeautifulSoup api_url = 'https://www.motilaloswal.com/ControllerBeta/APIRequest.aspx' data_future = { 'INPUT':'3yDBtksOiDjLLYaySd5NYgCcFnUOx8Jh2c8SRJvEhAs=', 'CATAGORY':'Q668CVoAKYOr7whA+PW25A==', 'FLAG':'VrTQQDRa72uSLVOyZO5Nqg==' } data_option = { 'INPUT': '3yDBtksOiDjLLYaySd5NYmcoKC8x7z5PFO880mjcQ2U=', 'CATAGORY': 'Q668CVoAKYOr7whA+PW25A==', 'FLAG': "VrTQQDRa72uSLVOyZO5Nqg==" } data_stock_future = { 'INPUT': '7F5jZM46TTOICwT1N6AfqkP7gWI2CpTGCWmll4bhYow=', 'CATAGORY': 'Q668CVoAKYOr7whA+PW25A==', 'FLAG': 'VrTQQDRa72uSLVOyZO5Nqg==' } data_stock_option = { 'INPUT': '7F5jZM46TTOICwT1N6Afqv1N6pCh+1OrTfhSwG6Azes=', 'CATAGORY': 'Q668CVoAKYOr7whA+PW25A==', 'FLAG': 'VrTQQDRa72uSLVOyZO5Nqg==' } json_data = json.loads(requests.post(api_url, data=data_stock_option).json()) # <-- change data= to data_stock_future or data_option ... # uncomment this to print all data: # print(json.dumps(json_data, indent=4)) df = pd.DataFrame(json_data) print(df) Prints: IndexType Date BuyContracts BuyValue SellContracts SellValue Net_BuySellValue OIContracts_eod OIValue_eod 0 STOCK OPTIONS 2020-07-14T00:00:00 52546 4349.020 54139 4525.850 -176.830 115998 7661.010 1 STOCK OPTIONS 2020-07-13T00:00:00 50604 4242.330 52221 4413.040 -170.710 110663 7329.990 2 STOCK OPTIONS 2020-07-10T00:00:00 82502 6218.200 82608 6219.420 -1.220 109680 7232.900 3 STOCK OPTIONS 2020-07-09T00:00:00 64743 4725.430 64613 4714.460 10.970 104780 6945.740 4 STOCK OPTIONS 2020-07-08T00:00:00 75481 5201.770 75713 5220.580 -18.810 100200 6584.390 .. ... ... ... ... ... ... ... ... ... 495 STOCK OPTIONS 2018-07-05T00:00:00 94696 6728.086 93256 6617.059 111.027 66773 4471.483 496 STOCK OPTIONS 2018-07-04T00:00:00 68719 4762.333 69376 4794.350 -32.017 59195 4005.045 497 STOCK OPTIONS 2018-07-03T00:00:00 64283 4351.226 64982 4347.153 4.073 53028 3581.946 498 STOCK OPTIONS 2018-07-02T00:00:00 74479 4913.606 74730 4897.239 16.367 44627 3024.131 499 STOCK OPTIONS 2018-06-29T00:00:00 69730 4694.675 68447 4645.350 49.325 35486 2421.744 [500 rows x 9 columns]
requests and bs4 cannot read the whole html
I am trying to get all the href in the list in this website:https://nihongonosensei.net/?page_id=10246. The website is very simple and clean. After reviewing the source, I found nothing dynamic. However, if I do import requests url = 'https://nihongonosensei.net/?page_id=10246' r = requests.get(url) r.text r.text only contains around 20000 characters information. More than half of the html is missing. I tried to copy the whole HTML from "view page source" and directly load to Beautifulsoup: from bs4 import BeautifulSoup html = '' # too long to copy. Here is the link: view-source:https://nihongonosensei.net/?page_id=10246 soup = BeautifulSoup(html, 'html.parser') Still only around 20000 characters are retained and the top half of the html is missing. Here is my question: Is there any character restrictions with requests and BeautifulSoup? If so, how can I remove the limitation? If not, why I cannot get the full html? Thanks a lot! Rachel
import requests from bs4 import BeautifulSoup r = requests.get("https://nihongonosensei.net/?page_id=10246") soup = BeautifulSoup(r.text, 'html.parser') for item in soup.findAll("a", href=True): item = item.get("href") if item.startswith("http"): print(item) output: https://nihongonosensei.net/ http://nihongonosensei.net/?p=3547 http://nihongonosensei.net/?p=3563 http://nihongonosensei.net/?p=3568 http://nihongonosensei.net/?p=3600 http://nihongonosensei.net/?p=3614 http://nihongonosensei.net/?p=3618 http://nihongonosensei.net/?p=3622 http://nihongonosensei.net/?p=3626 http://nihongonosensei.net/?p=3633 http://nihongonosensei.net/?p=3695 http://nihongonosensei.net/?p=3697 http://nihongonosensei.net/?p=3702 http://nihongonosensei.net/?p=3707 http://nihongonosensei.net/?p=3710 http://nihongonosensei.net/?p=3712 http://nihongonosensei.net/?p=3714 http://nihongonosensei.net/?p=3719 http://nihongonosensei.net/?p=3722 http://nihongonosensei.net/?p=3726 http://nihongonosensei.net/?p=3730 http://nihongonosensei.net/?p=3733 http://nihongonosensei.net/?p=3735 http://nihongonosensei.net/?p=5236 http://nihongonosensei.net/?p=5238 http://nihongonosensei.net/?p=5240 http://nihongonosensei.net/?p=5244 http://nihongonosensei.net/?p=5618 http://nihongonosensei.net/?p=5620 http://nihongonosensei.net/?p=5961 http://nihongonosensei.net/?p=5965 http://nihongonosensei.net/?p=5967 http://nihongonosensei.net/?p=5970 http://nihongonosensei.net/?p=5972 http://nihongonosensei.net/?p=6772 http://nihongonosensei.net/?p=7977 http://nihongonosensei.net/?p=7979 http://nihongonosensei.net/?p=7983 http://nihongonosensei.net/?p=7985 http://nihongonosensei.net/?p=7987 http://nihongonosensei.net/?p=8869 http://nihongonosensei.net/?p=8891 http://nihongonosensei.net/?p=9192 http://nihongonosensei.net/?p=9197 http://nihongonosensei.net/?p=9198 http://nihongonosensei.net/?p=9199 http://nihongonosensei.net/?p=9219 http://nihongonosensei.net/?p=9221 http://nihongonosensei.net/?p=9223 http://nihongonosensei.net/?p=9249 http://nihongonosensei.net/?p=9280 http://nihongonosensei.net/?p=9320 http://nihongonosensei.net/?p=9322 http://nihongonosensei.net/?p=9324 http://nihongonosensei.net/?p=9327 http://nihongonosensei.net/?p=9329 http://nihongonosensei.net/?p=9353 http://nihongonosensei.net/?p=9359 http://nihongonosensei.net/?p=9360 http://nihongonosensei.net/?p=13973 http://nihongonosensei.net/?p=13972 http://nihongonosensei.net/?p=13974 http://nihongonosensei.net/?p=11851 http://nihongonosensei.net/?p=11858 http://nihongonosensei.net/?p=12202 http://nihongonosensei.net/?p=12999 http://nihongonosensei.net/?p=13112 http://nihongonosensei.net/?p=13364 http://nihongonosensei.net/?p=13494 http://nihongonosensei.net/?p=14887 http://nihongonosensei.net/?p=14889 http://nihongonosensei.net/?p=14915 http://nihongonosensei.net/?p=14918 http://nihongonosensei.net/?p=17745 http://nihongonosensei.net/?p=18155 http://nihongonosensei.net/?p=18159 http://nihongonosensei.net/?p=18188 http://nihongonosensei.net/?p=18206 http://nihongonosensei.net/?p=18204 http://nihongonosensei.net/?p=18223 http://nihongonosensei.net/?p=18407 http://nihongonosensei.net/?p=18460 http://nihongonosensei.net/?p=18461 http://nihongonosensei.net/?p=18578 http://nihongonosensei.net/?p=18611 http://nihongonosensei.net/?p=18696 http://nihongonosensei.net/?p=18705 http://nihongonosensei.net/?p=18707 http://nihongonosensei.net/?p=18763 http://nihongonosensei.net/?p=3738 http://nihongonosensei.net/?p=3745 http://nihongonosensei.net/?p=3759 http://nihongonosensei.net/?p=3776 http://nihongonosensei.net/?p=3778 http://nihongonosensei.net/?p=3781 http://nihongonosensei.net/?p=3783 http://nihongonosensei.net/?p=3785 http://nihongonosensei.net/?p=3797 http://nihongonosensei.net/?p=3799 http://nihongonosensei.net/?p=3801 http://nihongonosensei.net/?p=3804 http://nihongonosensei.net/?p=3809 http://nihongonosensei.net/?p=3824 http://nihongonosensei.net/?p=3826 http://nihongonosensei.net/?p=13941 http://nihongonosensei.net/?p=3833 http://nihongonosensei.net/?p=4097 http://nihongonosensei.net/?p=5058 http://nihongonosensei.net/?p=5246 http://nihongonosensei.net/?p=5248 http://nihongonosensei.net/?p=5251 http://nihongonosensei.net/?p=5253 http://nihongonosensei.net/?p=5255 http://nihongonosensei.net/?p=5616 http://nihongonosensei.net/?p=5614 http://nihongonosensei.net/?p=5978 http://nihongonosensei.net/?p=5982 http://nihongonosensei.net/?p=5974 http://nihongonosensei.net/?p=6203 http://nihongonosensei.net/?p=6205 http://nihongonosensei.net/?p=11829 http://nihongonosensei.net/?p=11830 http://nihongonosensei.net/?p=6209 http://nihongonosensei.net/?p=6211 http://nihongonosensei.net/?p=7909 http://nihongonosensei.net/?p=7970 http://nihongonosensei.net/?p=7972 http://nihongonosensei.net/?p=7974 http://nihongonosensei.net/?p=7990 http://nihongonosensei.net/?p=7992 http://nihongonosensei.net/?p=8008 http://nihongonosensei.net/?p=8010 http://nihongonosensei.net/?p=8012 http://nihongonosensei.net/?p=9447 http://nihongonosensei.net/?p=9452 http://nihongonosensei.net/?p=9876 http://nihongonosensei.net/?p=9884 http://nihongonosensei.net/?p=9890 http://nihongonosensei.net/?p=9891 http://nihongonosensei.net/?p=9945 http://nihongonosensei.net/?p=14072 http://nihongonosensei.net/?p=14073 http://nihongonosensei.net/?p=10533 http://nihongonosensei.net/?p=10532 http://nihongonosensei.net/?p=11855 http://nihongonosensei.net/?p=11521 http://nihongonosensei.net/?p=18734 http://nihongonosensei.net/?p=18726 http://nihongonosensei.net/?p=11862 http://nihongonosensei.net/?p=11864 http://nihongonosensei.net/?p=11866 http://nihongonosensei.net/?p=12025 http://nihongonosensei.net/?p=12027 http://nihongonosensei.net/?p=12115 http://nihongonosensei.net/?p=13076 http://nihongonosensei.net/?p=13142 http://nihongonosensei.net/?p=13145 http://nihongonosensei.net/?p=13453 http://nihongonosensei.net/?p=13456 http://nihongonosensei.net/?p=13459 http://nihongonosensei.net/?p=13479 http://nihongonosensei.net/?p=13483 http://nihongonosensei.net/?p=3535 http://nihongonosensei.net/?p=14896 http://nihongonosensei.net/?p=18263 http://nihongonosensei.net/?p=18324 http://nihongonosensei.net/?p=18366 http://nihongonosensei.net/?p=18373 http://nihongonosensei.net/?p=18381 http://nihongonosensei.net/?p=18398 http://nihongonosensei.net/?p=18680 http://nihongonosensei.net/?p=18682 http://nihongonosensei.net/?p=18684 http://nihongonosensei.net/?p=1700 http://nihongonosensei.net/?p=1708 http://nihongonosensei.net/?p=1713 http://nihongonosensei.net/?p=1718 http://nihongonosensei.net/?p=1735 http://nihongonosensei.net/?p=1742 http://nihongonosensei.net/?p=1745 http://nihongonosensei.net/?p=1748 http://nihongonosensei.net/?p=1752 http://nihongonosensei.net/?p=1755 http://nihongonosensei.net/?p=1758 http://nihongonosensei.net/?p=1761 http://nihongonosensei.net/?p=1764 http://nihongonosensei.net/?p=1767 http://nihongonosensei.net/?p=1770 http://nihongonosensei.net/?p=1773 http://nihongonosensei.net/?p=1777 http://nihongonosensei.net/?p=1782 http://nihongonosensei.net/?p=1785 http://nihongonosensei.net/?p=1788 http://nihongonosensei.net/?p=1791 http://nihongonosensei.net/?p=1794 http://nihongonosensei.net/?p=1797 http://nihongonosensei.net/?p=1801 http://nihongonosensei.net/?p=1804 http://nihongonosensei.net/?p=1807 http://nihongonosensei.net/?p=1810 http://nihongonosensei.net/?p=1813 http://nihongonosensei.net/?p=1816 http://nihongonosensei.net/?p=1819 http://nihongonosensei.net/?p=1823 http://nihongonosensei.net/?p=1828 http://nihongonosensei.net/?p=1835 http://nihongonosensei.net/?p=1838 http://nihongonosensei.net/?p=12082 http://nihongonosensei.net/?p=3470 http://nihongonosensei.net/?p=3477 http://nihongonosensei.net/?p=3484 http://nihongonosensei.net/?p=3492 http://nihongonosensei.net/?p=3553 http://nihongonosensei.net/?p=3559 http://nihongonosensei.net/?p=13970 http://nihongonosensei.net/?p=6331 http://nihongonosensei.net/?p=6335 http://nihongonosensei.net/?p=6339 http://nihongonosensei.net/?p=6341 http://nihongonosensei.net/?p=6769 http://nihongonosensei.net/?p=8506 http://nihongonosensei.net/?p=8857 http://nihongonosensei.net/?p=9283 http://nihongonosensei.net/?p=9306 http://nihongonosensei.net/?p=9308 http://nihongonosensei.net/?p=9312 http://nihongonosensei.net/?p=9314 http://nihongonosensei.net/?p=9422 http://nihongonosensei.net/?p=9462 http://nihongonosensei.net/?p=9860 http://nihongonosensei.net/?p=11635 http://nihongonosensei.net/?p=12073 http://nihongonosensei.net/?p=12784 http://nihongonosensei.net/?p=12795 http://nihongonosensei.net/?p=12821 http://nihongonosensei.net/?p=12824 http://nihongonosensei.net/?p=12830 http://nihongonosensei.net/?p=12832 http://nihongonosensei.net/?p=12834 http://nihongonosensei.net/?p=12987 http://nihongonosensei.net/?p=12995 http://nihongonosensei.net/?p=13018 http://nihongonosensei.net/?p=3761 http://nihongonosensei.net/?p=13326 http://nihongonosensei.net/?p=13327 http://nihongonosensei.net/?p=13340 http://nihongonosensei.net/?p=13344 http://nihongonosensei.net/?p=17748 http://nihongonosensei.net/?p=17758 http://nihongonosensei.net/?p=17767 http://nihongonosensei.net/?p=17771 http://nihongonosensei.net/?p=18162 http://nihongonosensei.net/?p=18165 http://nihongonosensei.net/?p=18171 http://nihongonosensei.net/?p=18202 http://nihongonosensei.net/?p=18199 http://nihongonosensei.net/?p=18314 http://nihongonosensei.net/?p=18312 http://nihongonosensei.net/?p=18399 http://nihongonosensei.net/?p=18400 http://nihongonosensei.net/?p=18585 http://nihongonosensei.net/?p=18589 http://nihongonosensei.net/?p=18591 http://nihongonosensei.net/?p=18301 http://nihongonosensei.net/?p=18701 http://nihongonosensei.net/?p=18773 http://nihongonosensei.net/?p=18775 http://nihongonosensei.net/?p=18788 http://nihongonosensei.net/?p=18790 http://nihongonosensei.net/?p=18792 http://nihongonosensei.net/?p=18821 http://nihongonosensei.net/?p=3571 http://nihongonosensei.net/?p=9936 http://nihongonosensei.net/?p=3578 http://nihongonosensei.net/?p=5980 http://nihongonosensei.net/?p=3609 http://nihongonosensei.net/?p=3680 http://nihongonosensei.net/?p=3828 http://nihongonosensei.net/?p=6345 http://nihongonosensei.net/?p=6347 http://nihongonosensei.net/?p=6351 http://nihongonosensei.net/?p=7905 http://nihongonosensei.net/?p=7907 http://nihongonosensei.net/?p=8063 http://nihongonosensei.net/?p=18470 http://nihongonosensei.net/?p=18471 http://nihongonosensei.net/?p=9425 http://nihongonosensei.net/?p=9426 http://nihongonosensei.net/?p=9465 http://nihongonosensei.net/?p=9466 http://nihongonosensei.net/?p=9872 http://nihongonosensei.net/?p=10058 http://nihongonosensei.net/?p=11304 http://nihongonosensei.net/?p=11948 http://nihongonosensei.net/?p=18497 http://nihongonosensei.net/?p=18499 http://nihongonosensei.net/?p=18501 http://nihongonosensei.net/?p=12143 http://nihongonosensei.net/?p=12789 http://nihongonosensei.net/?p=12882 http://nihongonosensei.net/?p=12885 http://nihongonosensei.net/?p=12886 http://nihongonosensei.net/?p=13074 http://nihongonosensei.net/?p=13087 http://nihongonosensei.net/?p=13092 http://nihongonosensei.net/?p=13136 http://nihongonosensei.net/?p=13151 http://nihongonosensei.net/?p=13371 http://nihongonosensei.net/?p=18157 http://nihongonosensei.net/?p=18219 http://nihongonosensei.net/?p=18221 http://nihongonosensei.net/?p=18266 http://nihongonosensei.net/?p=18292 http://nihongonosensei.net/?p=18293 http://nihongonosensei.net/?p=18392 http://nihongonosensei.net/?p=18488 http://nihongonosensei.net/?p=18489 http://nihongonosensei.net/?p=18593 http://nihongonosensei.net/?p=18595 http://nihongonosensei.net/?p=18612 http://nihongonosensei.net/?p=18613 http://nihongonosensei.net/?p=18657 http://nihongonosensei.net/?p=18659 http://nihongonosensei.net/?p=18662 http://nihongonosensei.net/?p=18664 http://nihongonosensei.net/?p=12827 http://nihongonosensei.net/?p=4094 http://nihongonosensei.net/?p=18732 http://nihongonosensei.net/?p=18728 http://nihongonosensei.net/?p=18720 http://nihongonosensei.net/?p=18722 http://nihongonosensei.net/?p=18730 http://nihongonosensei.net/?p=18724 http://nihongonosensei.net/?p=4094 http://nihongonosensei.net/?p=3500 http://nihongonosensei.net/?p=3526 http://nihongonosensei.net/?p=3529 http://nihongonosensei.net/?p=3474 http://nihongonosensei.net/?p=3585 http://nihongonosensei.net/?p=3606 http://nihongonosensei.net/?p=3643 http://nihongonosensei.net/?p=3650 http://nihongonosensei.net/?p=3656 http://nihongonosensei.net/?p=5062 http://nihongonosensei.net/?p=5941 http://nihongonosensei.net/?p=5943 http://nihongonosensei.net/?p=5945 http://nihongonosensei.net/?p=5947 http://nihongonosensei.net/?p=5949 http://nihongonosensei.net/?p=5984 http://nihongonosensei.net/?p=7024 http://nihongonosensei.net/?p=7026 http://nihongonosensei.net/?p=7096 http://nihongonosensei.net/?p=7098 http://nihongonosensei.net/?p=7100 http://nihongonosensei.net/?p=7102 http://nihongonosensei.net/?p=7104 http://nihongonosensei.net/?p=7152 http://nihongonosensei.net/?p=10116 http://nihongonosensei.net/?p=3550 http://nihongonosensei.net/?p=8048 http://nihongonosensei.net/?p=6349 http://nihongonosensei.net/?p=8051 http://nihongonosensei.net/?p=8058 http://nihongonosensei.net/?p=8061 http://nihongonosensei.net/?p=8070 http://nihongonosensei.net/?p=8080 http://nihongonosensei.net/?p=8082 http://nihongonosensei.net/?p=4085 http://nihongonosensei.net/?p=4088 http://nihongonosensei.net/?p=8540 http://nihongonosensei.net/?p=8542 http://nihongonosensei.net/?p=8558 http://nihongonosensei.net/?p=8564 http://nihongonosensei.net/?p=8665 http://nihongonosensei.net/?p=8669 http://nihongonosensei.net/?p=8672 http://nihongonosensei.net/?p=8675 http://nihongonosensei.net/?p=8710 http://nihongonosensei.net/?p=8705 http://nihongonosensei.net/?p=7981 http://nihongonosensei.net/?p=8724 http://nihongonosensei.net/?p=8730 http://nihongonosensei.net/?p=8733 http://nihongonosensei.net/?p=8856 http://nihongonosensei.net/?p=9310 http://nihongonosensei.net/?p=9352 http://nihongonosensei.net/?p=5242 http://nihongonosensei.net/?p=9385 http://nihongonosensei.net/?p=9386 http://nihongonosensei.net/?p=9488 http://nihongonosensei.net/?p=9487 http://nihongonosensei.net/?p=12075 http://nihongonosensei.net/?p=18193 http://nihongonosensei.net/?p=18350 http://nihongonosensei.net/?p=18351 http://nihongonosensei.net/?p=18406 http://nihongonosensei.net/?p=18428 http://nihongonosensei.net/?p=18447 http://nihongonosensei.net/?p=18587 http://nihongonosensei.net/?p=18698 http://nihongonosensei.net/?p=18695 http://nihongonosensei.net/?p=18703 http://nihongonosensei.net/?p=3659 http://nihongonosensei.net/?p=3673 http://nihongonosensei.net/?p=3676 http://nihongonosensei.net/?p=3683 http://nihongonosensei.net/?p=3686 http://nihongonosensei.net/?p=18190 http://nihongonosensei.net/?p=3747 http://nihongonosensei.net/?p=3749 http://nihongonosensei.net/?p=3753 http://nihongonosensei.net/?p=5951 http://nihongonosensei.net/?p=5953 http://nihongonosensei.net/?p=5955 http://nihongonosensei.net/?p=5957 http://nihongonosensei.net/?p=7068 http://nihongonosensei.net/?p=7071 http://nihongonosensei.net/?p=7075 http://nihongonosensei.net/?p=7121 http://nihongonosensei.net/?p=3541 http://nihongonosensei.net/?p=8004 http://nihongonosensei.net/?p=6343 http://nihongonosensei.net/?p=8144 http://nihongonosensei.net/?p=8143 http://nihongonosensei.net/?p=8150 http://nihongonosensei.net/?p=8152 http://nihongonosensei.net/?p=8161 http://nihongonosensei.net/?p=8164 http://nihongonosensei.net/?p=8257 http://nihongonosensei.net/?p=9482 http://nihongonosensei.net/?p=8261 http://nihongonosensei.net/?p=8159 http://nihongonosensei.net/?p=8272 http://nihongonosensei.net/?p=8274 http://nihongonosensei.net/?p=8277 http://nihongonosensei.net/?p=8279 http://nihongonosensei.net/?p=9215 http://nihongonosensei.net/?p=9217 http://nihongonosensei.net/?p=9859 http://nihongonosensei.net/?p=10102 http://nihongonosensei.net/?p=18631 http://nihongonosensei.net/?p=18632 http://nihongonosensei.net/?p=11303 http://nihongonosensei.net/?p=12781 http://nihongonosensei.net/?p=12812 http://nihongonosensei.net/?p=12799 http://nihongonosensei.net/?p=12802 http://nihongonosensei.net/?p=12809 http://nihongonosensei.net/?p=13150 http://nihongonosensei.net/?p=11946 http://nihongonosensei.net/?p=13152 http://nihongonosensei.net/?p=18503 http://nihongonosensei.net/?p=3582 http://nihongonosensei.net/?p=17664 http://nihongonosensei.net/?p=17751 http://nihongonosensei.net/?p=18264 http://nihongonosensei.net/?p=18267 http://nihongonosensei.net/?p=18265 http://nihongonosensei.net/?p=18303 http://nihongonosensei.net/?p=18393 http://nihongonosensei.net/?p=8281 http://nihongonosensei.net/?p=18614 http://nihongonosensei.net/?p=18676 http://nihongonosensei.net/?p=18678 http://nihongonosensei.net/?p=18816 http://nihongonosensei.net/?p=18818 http://nihongonosensei.net/?p=18812 http://nihongonosensei.net/?p=18809 http://nihongonosensei.net/?p=18807 http://nihongonosensei.net/?p=18805 http://nihongonosensei.net/?p=18803 http://nihongonosensei.net/?p=18330 http://nihongonosensei.net/?p=3446 http://nihongonosensei.net/?p=3662 http://nihongonosensei.net/?p=5182 http://nihongonosensei.net/?p=9262 http://nihongonosensei.net/?p=9264 http://nihongonosensei.net/?p=3647 http://nihongonosensei.net/?p=8567 http://nihongonosensei.net/?p=9343 http://nihongonosensei.net/?p=8045 http://nihongonosensei.net/?p=18305 http://nihongonosensei.net/?p=18307 http://nihongonosensei.net/?p=18427 http://nihongonosensei.net/?p=18615 http://nihongonosensei.net/?p=18713 http://nihongonosensei.net/?p=18715 http://nihongonosensei.net/?p=18717 http://nihongonosensei.net/?p=18736 http://nihongonosensei.net/?p=3668 http://nihongonosensei.net/?p=5180 http://nihongonosensei.net/?p=4090 http://nihongonosensei.net/?p=11943 http://nihongonosensei.net/?p=11950 http://nihongonosensei.net/?p=11941 http://nihongonosensei.net/?p=12816 http://nihongonosensei.net/?p=18323 http://nihongonosensei.net/?p=18349 http://nihongonosensei.net/?p=18784 http://nihongonosensei.net/?p=18786 http://nihongonosensei.net/?p=18814 http://nihongonosensei.net/?p=18405 http://nihongonosensei.net/?p=5233 http://nihongonosensei.net/?p=7154 http://nihongonosensei.net/?p=7938 http://nihongonosensei.net/?p=7943 http://nihongonosensei.net/?p=8509 http://nihongonosensei.net/?p=8541 http://nihongonosensei.net/?p=8886 http://nihongonosensei.net/?p=8889 http://nihongonosensei.net/?p=9440 http://nihongonosensei.net/?p=9441 http://nihongonosensei.net/?p=3639 http://nihongonosensei.net/?p=3575 http://nihongonosensei.net/?p=3603 http://nihongonosensei.net/?p=11627 http://nihongonosensei.net/?p=11953 http://nihongonosensei.net/?p=11955 http://nihongonosensei.net/?p=17914 http://nihongonosensei.net/?p=18195 http://nihongonosensei.net/?p=18217 http://nihongonosensei.net/?p=18348 http://nihongonosensei.net/?p=18371 http://nihongonosensei.net/?p=18375 http://nihongonosensei.net/?p=18377 http://nihongonosensei.net/?p=18379 http://nihongonosensei.net/?p=18653 http://nihongonosensei.net/?p=18655 http://nihongonosensei.net/?p=13346 http://nihongonosensei.net/?p=13347 http://nihongonosensei.net/?p=13348 http://nihongonosensei.net/?p=13358 http://nihongonosensei.net/?p=13362 http://nihongonosensei.net/?p=13373 http://nihongonosensei.net/?p=13369 http://nihongonosensei.net/?p=13379 http://nihongonosensei.net/?p=13385 http://nihongonosensei.net/?p=13462 http://nihongonosensei.net/?p=13466 http://nihongonosensei.net/?p=14905 http://nihongonosensei.net/?p=17576 http://nihongonosensei.net/?p=17593 http://nihongonosensei.net/?p=17597 http://nihongonosensei.net/?p=17600 http://nihongonosensei.net/?p=17917 http://nihongonosensei.net/?p=18268 http://nihongonosensei.net/?p=18363 http://nihongonosensei.net/?p=19118 http://nihongonosensei.net/ http://nihongonosensei.net/?cat=7 http://nihongonosensei.net/?cat=3 http://nihongonosensei.net/?page_id=10246 http://nihongonosensei.net/?page_id=10246#linkn1 http://nihongonosensei.net/?page_id=10246#linkn2 http://nihongonosensei.net/?page_id=10246#linkn3 http://nihongonosensei.net/?page_id=10246#linkn4n5 http://nihongonosensei.net/?page_id=10246#linkn0 http://nihongonosensei.net/?page_id=13879 http://nihongonosensei.net/?page_id=8874 http://nihongonosensei.net/?p=17729 http://nihongonosensei.net/?page_id=8874#link2019 http://nihongonosensei.net/?page_id=8874#link30 http://nihongonosensei.net/?page_id=8874#link29 http://nihongonosensei.net/?page_id=8874#link28 http://nihongonosensei.net/?page_id=8874#link27 http://nihongonosensei.net/?page_id=8874#link26 http://nihongonosensei.net/?page_id=8874#link25 http://nihongonosensei.net/?page_id=8874#link24 http://nihongonosensei.net/?page_id=8874#link23 http://nihongonosensei.net/?page_id=4945 http://nihongonosensei.net/?page_id=5094 http://nihongonosensei.net/?page_id=13794 http://nihongonosensei.net/?page_id=13794#link1 http://nihongonosensei.net/?page_id=13794#link2 http://nihongonosensei.net/?page_id=13825 http://nihongonosensei.net/?page_id=13827 http://nihongonosensei.net/?page_id=1904 https://thk.kanzae.net/
Trying to grab just the first href in each table row
I'm trying to grab just the first href in each row in an HTML table. Using find_all on the soup object doesn't work because there are multiple tables so I used soup.select() to isolate just that table and work from there but it doesn't seem to be working. Tried using find_all on the soup object alone, tried looping through the table rows with find() but it said that it returns 'NoneType'. I Would like to be able to store a list that starts [/players/a/abrinal01.html, "/players/a/acyqu01.html, etc] url = 'https://www.basketball-reference.com/leagues/NBA_2019_per_game.html' res = requests.get(url) res.raise_for_status() soup = bs4.BeautifulSoup(res.text, 'html.parser') table = soup.find("table", { "id" : "per_game_stats" })
You can access the desired data by anchoring the parsing from the outer div wrapper with the id of all_per_game_stats: import requests from bs4 import BeautifulSoup as soup d = soup(requests.get('https://www.basketball-reference.com/leagues/NBA_2019_per_game.html').text, 'html.parser') data = [b.td.a['href'] for b in d.find('div', {'id':'all_per_game_stats'}).table.find_all('tr') if b.td] Output: ['/players/a/abrinal01.html', '/players/a/acyqu01.html', '/players/a/adamsja01.html', '/players/a/adamsst01.html', '/players/a/adebaba01.html', '/players/a/adelde01.html', '/players/a/akoonde01.html', '/players/a/aldrila01.html', '/players/a/alkinra01.html', '/players/a/allengr01.html', '/players/a/allenja01.html', '/players/a/allenka01.html', '/players/a/aminual01.html', '/players/a/anderju01.html', '/players/a/anderky01.html', '/players/a/anderry01.html', '/players/a/anderry01.html', '/players/a/anderry01.html', '/players/a/anigbik01.html', '/players/a/antetgi01.html', '/players/a/antetko01.html', '/players/a/anthoca01.html', '/players/a/anunoog01.html', '/players/a/arcidry01.html', '/players/a/arizatr01.html', '/players/a/arizatr01.html', '/players/a/arizatr01.html', '/players/a/augusdj01.html', '/players/a/aytonde01.html', '/players/b/bacondw01.html', '/players/b/baglema01.html', '/players/b/bakerro01.html', '/players/b/bakerro01.html', '/players/b/bakerro01.html', '/players/b/baldwwa01.html', '/players/b/balllo01.html', '/players/b/bambamo01.html', '/players/b/bareajo01.html', '/players/b/barneha02.html', '/players/b/barneha02.html', '/players/b/barneha02.html', '/players/b/bartowi01.html', '/players/b/bateske01.html', '/players/b/batumni01.html', '/players/b/bayleje01.html', '/players/b/baynear01.html', '/players/b/bazemke01.html', '/players/b/bealbr01.html', '/players/b/beaslma01.html', '/players/b/beaslmi01.html', '/players/b/belinma01.html', '/players/b/belljo01.html', '/players/b/bembrde01.html', '/players/b/bendedr01.html', '/players/b/bertada02.html', '/players/b/bertada01.html', '/players/b/beverpa01.html', '/players/b/birchkh01.html', '/players/b/biyombi01.html', '/players/b/bjeline01.html', '/players/b/blakean01.html', '/players/b/bledser01.html', '/players/b/blossja01.html', '/players/b/bogdabo01.html', '/players/b/bogdabo02.html', '/players/b/bogutan01.html', '/players/b/boldejo01.html', '/players/b/bongais01.html', '/players/b/bookede01.html', '/players/b/bouchch01.html', '/players/b/bradlav01.html', '/players/b/bradlav01.html', '/players/b/bradlav01.html', '/players/b/bradlto01.html', '/players/b/breweco01.html', '/players/b/breweco01.html', '/players/b/breweco01.html', '/players/b/bridgmi01.html', '/players/b/bridgmi02.html', '/players/b/briscis01.html', '/players/b/broekry01.html', '/players/b/brogdma01.html', '/players/b/brookdi01.html', '/players/b/brookma01.html', '/players/b/brownbr01.html', '/players/b/brownja02.html', '/players/b/brownlo01.html', '/players/b/brownst02.html', '/players/b/browntr01.html', '/players/b/brunsja01.html', '/players/b/bryanth01.html', '/players/b/bullore01.html', '/players/b/bullore01.html', '/players/b/bullore01.html', '/players/b/burketr01.html', '/players/b/burketr01.html', '/players/b/burketr01.html', '/players/b/burksal01.html', '/players/b/burksal01.html', '/players/b/burksal01.html', '/players/b/burksal01.html', '/players/b/burtode02.html', '/players/b/butleji01.html', '/players/b/butleji01.html', '/players/b/butleji01.html', '/players/c/cabocbr01.html', '/players/c/caldejo01.html', '/players/c/caldwke01.html', '/players/c/canaais01.html', '/players/c/canaais01.html', '/players/c/canaais01.html', '/players/c/canaais01.html', '/players/c/capelca01.html', '/players/c/carrode01.html', '/players/c/carteje01.html', '/players/c/cartevi01.html', '/players/c/cartewe01.html', '/players/c/cartemi01.html', '/players/c/cartemi01.html', '/players/c/cartemi01.html', '/players/c/carusal01.html', '/players/c/casspom01.html', '/players/c/caulewi01.html', '/players/c/caupatr01.html', '/players/c/cavanty01.html', '/players/c/chandty01.html', '/players/c/chandty01.html', '/players/c/chandty01.html', '/players/c/chandwi01.html', '/players/c/chandwi01.html', '/players/c/chandwi01.html', '/players/c/chealjo01.html', '/players/c/chiozch01.html', '/players/c/chrisma01.html', '/players/c/chrisma01.html', '/players/c/chrisma01.html', '/players/c/clarkga01.html', '/players/c/clarkia01.html', '/players/c/clarkjo01.html', '/players/c/collijo01.html', '/players/c/colliza01.html', '/players/c/collida01.html', '/players/c/colsobo01.html', '/players/c/conlemi01.html', '/players/c/connapa01.html', '/players/c/cookqu01.html', '/players/c/couside01.html', '/players/c/covinro01.html', '/players/c/covinro01.html', '/players/c/covinro01.html', '/players/c/crabbal01.html', '/players/c/craigto01.html', '/players/c/crawfja01.html', '/players/c/creekmi01.html', '/players/c/creekmi01.html', '/players/c/creekmi01.html', '/players/c/crowdja01.html', '/players/c/cunnida01.html', '/players/c/curryse01.html', '/players/c/curryst01.html', '/players/d/danietr01.html', '/players/d/davisan02.html', '/players/d/davisde01.html', '/players/d/davised01.html', '/players/d/davisty01.html', '/players/d/dedmode01.html', '/players/d/dekkesa01.html', '/players/d/dekkesa01.html', '/players/d/dekkesa01.html', '/players/d/delgaan01.html', '/players/d/dellama01.html', '/players/d/dellama01.html', '/players/d/dellama01.html', '/players/d/denglu01.html', '/players/d/derozde01.html', '/players/d/derrima01.html', '/players/d/diallch01.html', '/players/d/diallha01.html', '/players/d/dienggo01.html', '/players/d/dinwisp01.html', '/players/d/divindo01.html', '/players/d/doncilu01.html', '/players/d/dorsety01.html', '/players/d/dorsety01.html', '/players/d/dorsety01.html', '/players/d/dotsoda01.html', '/players/d/doziepj01.html', '/players/d/dragigo01.html', '/players/d/drumman01.html', '/players/d/dudleja01.html', '/players/d/dunnkr01.html', '/players/d/duranke01.html', '/players/d/duvaltr01.html', '/players/e/edwarvi01.html', '/players/e/ellenhe01.html', '/players/e/ellenhe01.html', '/players/e/ellenhe01.html', '/players/e/ellinwa01.html', '/players/e/ellinwa01.html', '/players/e/ellinwa01.html', '/players/e/embiijo01.html', '/players/e/ennisja01.html', '/players/e/ennisja01.html', '/players/e/ennisja01.html', '/players/e/eubandr01.html', '/players/e/evansja02.html', '/players/e/evansja01.html', '/players/e/evansja01.html', '/players/e/evansja01.html', '/players/e/evansty01.html', '/players/e/exumda01.html', '/players/f/farieke01.html', '/players/f/farieke01.html', '/players/f/farieke01.html', '/players/f/favorde01.html', '/players/f/feliccr01.html', '/players/f/feltora01.html', '/players/f/fergute01.html', '/players/f/ferreyo01.html', '/players/f/finnedo01.html', '/players/f/forbebr01.html', '/players/f/fournev01.html', '/players/f/foxde01.html', '/players/f/frazime01.html', '/players/f/fraziti01.html', '/players/f/fraziti01.html', '/players/f/fraziti01.html', '/players/f/fredeji01.html', '/players/f/fryech01.html', '/players/f/fultzma01.html', '/players/g/gallida01.html', '/players/g/gallola01.html', '/players/g/garrebi01.html', '/players/g/gasolma01.html', '/players/g/gasolma01.html', '/players/g/gasolma01.html', '/players/g/gasolpa01.html', '/players/g/gasolpa01.html', '/players/g/gasolpa01.html', '/players/g/gayru01.html', '/players/g/georgpa01.html', '/players/g/gibsota01.html', '/players/g/gilesha01.html', '/players/g/gilgesh01.html', '/players/g/goberru01.html', '/players/g/goodwbr01.html', '/players/g/gordoaa01.html', '/players/g/gordoer01.html', '/players/g/gortama01.html', '/players/g/grahade01.html', '/players/g/grahatr01.html', '/players/g/grantje01.html', '/players/g/grantje02.html', '/players/g/grantdo01.html', '/players/g/greenda02.html', '/players/g/greendr01.html', '/players/g/greenge01.html', '/players/g/greenja01.html', '/players/g/greenja01.html', '/players/g/greenja01.html', '/players/g/greenje02.html', '/players/g/griffbl01.html', '/players/h/hamilda02.html', '/players/h/hannadu01.html', '/players/h/hardati02.html', '/players/h/hardati02.html', '/players/h/hardati02.html', '/players/h/hardeja01.html', '/players/h/harklma01.html', '/players/h/harremo01.html', '/players/h/harride01.html', '/players/h/harriga01.html', '/players/h/harrijo01.html', '/players/h/harrito02.html', '/players/h/harrito02.html', '/players/h/harrito02.html', '/players/h/harrian01.html', '/players/h/harrian01.html', '/players/h/harrian01.html', '/players/h/harrian01.html', '/players/h/harrish01.html', '/players/h/hartjo01.html', '/players/h/harteis01.html', '/players/h/hasleud01.html', '/players/h/haywago01.html', '/players/h/hensojo01.html', '/players/h/hernaju01.html', '/players/h/hernawi01.html', '/players/h/hezonma01.html', '/players/h/hicksis01.html', '/players/h/hieldbu01.html', '/players/h/highsha01.html', '/players/h/hilarne01.html', '/players/h/hillge01.html', '/players/h/hillge01.html', '/players/h/hillge01.html', '/players/h/hillso01.html', '/players/h/holidaa01.html', '/players/h/holidjr01.html', '/players/h/holidju01.html', '/players/h/holidju01.html', '/players/h/holidju01.html', '/players/h/hollajo02.html', '/players/h/holliro01.html', '/players/h/holmeri01.html', '/players/h/hoodro01.html', '/players/h/hoodro01.html', '/players/h/hoodro01.html', '/players/h/horfoal01.html', '/players/h/houseda01.html', '/players/h/howardw01.html', '/players/h/huertke01.html', '/players/h/humphis01.html', '/players/h/hunterj01.html', '/players/h/hutchch01.html', '/players/i/ibakase01.html', '/players/i/iguodan01.html', '/players/i/ilyaser01.html', '/players/i/inglejo01.html', '/players/i/ingraan01.html', '/players/i/ingrabr01.html', '/players/i/irvinky01.html', '/players/i/isaacjo01.html', '/players/i/iwundwe01.html', '/players/j/jacksde01.html', '/players/j/jacksfr01.html', '/players/j/jacksja02.html', '/players/j/jacksjo02.html', '/players/j/jacksju01.html', '/players/j/jacksju01.html', '/players/j/jacksju01.html', '/players/j/jacksre01.html', '/players/j/jamesle01.html', '/players/j/jeffeam01.html', '/players/j/jenkijo01.html', '/players/j/jenkijo01.html', '/players/j/jenkijo01.html', '/players/j/jerebjo01.html', '/players/j/johnsal02.html', '/players/j/johnsam01.html', '/players/j/johnsbj01.html', '/players/j/johnsbj01.html', '/players/j/johnsbj01.html', '/players/j/johnsja01.html', '/players/j/johnsst04.html', '/players/j/johnsst04.html', '/players/j/johnsst04.html', '/players/j/johnsty01.html', '/players/j/johnsty01.html', '/players/j/johnsty01.html', '/players/j/johnswe01.html', '/players/j/johnswe01.html', '/players/j/johnswe01.html', '/players/j/jokicni01.html', '/players/j/jonesda03.html', '/players/j/jonesde02.html', '/players/j/jonesja04.html', '/players/j/jonesje01.html', '/players/j/joneste01.html', '/players/j/jonesty01.html', '/players/j/jordade01.html', '/players/j/jordade01.html', '/players/j/jordade01.html', '/players/j/josepco01.html', '/players/k/kaminfr01.html', '/players/k/kanteen01.html', '/players/k/kanteen01.html', '/players/k/kanteen01.html', '/players/k/kennalu01.html', '/players/k/kiddgmi01.html', '/players/k/kingge03.html', '/players/k/klebima01.html', '/players/k/knighbr03.html', '/players/k/knighbr03.html', '/players/k/knighbr03.html', '/players/k/knoxke01.html', '/players/k/korkmfu01.html', '/players/k/kornelu01.html', '/players/k/korveky01.html', '/players/k/korveky01.html', '/players/k/korveky01.html', '/players/k/koufoko01.html', '/players/k/kurucro01.html', '/players/k/kuzmaky01.html', '/players/l/labissk01.html', '/players/l/labissk01.html', '/players/l/labissk01.html', '/players/l/lambje01.html', '/players/l/lavinza01.html', '/players/l/laymaja01.html', '/players/l/leaftj01.html', '/players/l/leeco01.html', '/players/l/leeco01.html', '/players/l/leeco01.html', '/players/l/leeda03.html', '/players/l/lemonwa01.html', '/players/l/lenal01.html', '/players/l/leonaka01.html', '/players/l/leoname01.html', '/players/l/leuerjo01.html', '/players/l/leverca01.html', '/players/l/lillada01.html', '/players/l/linje01.html', '/players/l/linje01.html', '/players/l/linje01.html', '/players/l/livinsh01.html', '/players/l/loftoza01.html', '/players/l/looneke01.html', '/players/l/lopezbr01.html', '/players/l/lopezro01.html', '/players/l/loveke01.html', '/players/l/lowryky01.html', '/players/l/loydjo01.html', '/players/l/lucaska01.html', '/players/l/luwawti01.html', '/players/l/luwawti01.html', '/players/l/luwawti01.html', '/players/l/lydonty01.html', '/players/l/lylestr01.html', '/players/m/machasc01.html', '/players/m/macksh01.html', '/players/m/macksh01.html', '/players/m/macksh01.html', '/players/m/maconda01.html', '/players/m/macurjp01.html', '/players/m/mahinia01.html', '/players/m/makerth01.html', '/players/m/makerth01.html', '/players/m/makerth01.html', '/players/m/marjabo01.html', '/players/m/marjabo01.html', '/players/m/marjabo01.html', '/players/m/markkla01.html', '/players/m/martija01.html', '/players/m/masonfr01.html', '/players/m/matenya01.html', '/players/m/matthwe02.html', '/players/m/matthwe02.html', '/players/m/matthwe02.html', '/players/m/matthwe02.html', '/players/m/mbahalu01.html', '/players/m/mccalta01.html', '/players/m/mccawpa01.html', '/players/m/mccawpa01.html', '/players/m/mccawpa01.html', '/players/m/mccolcj01.html', '/players/m/mccontj01.html', '/players/m/mcderdo01.html', '/players/m/mcgeeja01.html', '/players/m/mcgruro01.html', '/players/m/mckinal01.html', '/players/m/mclembe01.html', '/players/m/mcraejo01.html', '/players/m/meeksjo01.html', '/players/m/mejrisa01.html', '/players/m/meltode01.html', '/players/m/metuch01.html', '/players/m/middlkh01.html', '/players/m/milescj01.html', '/players/m/milescj01.html', '/players/m/milescj01.html', '/players/m/milleda01.html', '/players/m/millema01.html', '/players/m/millspa02.html', '/players/m/millspa01.html', '/players/m/miltosh01.html', '/players/m/mirotni01.html', '/players/m/mirotni01.html', '/players/m/mirotni01.html', '/players/m/mitchdo01.html', '/players/m/mitrona01.html', '/players/m/monkma01.html', '/players/m/monrogr01.html', '/players/m/monrogr01.html', '/players/m/monrogr01.html', '/players/m/monrogr01.html', '/players/m/mooreet01.html', '/players/m/moreler01.html', '/players/m/moreler01.html', '/players/m/moreler01.html', '/players/m/morrija01.html', '/players/m/morrima03.html', '/players/m/morrima02.html', '/players/m/morrima02.html', '/players/m/morrima02.html', '/players/m/morrimo01.html', '/players/m/motiedo01.html', '/players/m/motlejo01.html', '/players/m/mudiaem01.html', '/players/m/murraja01.html', '/players/m/musadz01.html', '/players/m/muscami01.html', '/players/m/muscami01.html', '/players/m/muscami01.html', '/players/m/mykhasv01.html', '/players/m/mykhasv01.html', '/players/m/mykhasv01.html', '/players/n/naderab01.html', '/players/n/nancela02.html', '/players/n/napiesh01.html', '/players/n/netora01.html', '/players/n/niangge01.html', '/players/n/noahjo01.html', '/players/n/noelne01.html', '/players/n/nowitdi01.html', '/players/n/ntilila01.html', '/players/n/nunnaja01.html', '/players/n/nunnaja01.html', '/players/n/nunnaja01.html', '/players/n/nurkiju01.html', '/players/n/nwabada01.html', '/players/o/onealro01.html', '/players/o/oquinky01.html', '/players/o/ojelese01.html', '/players/o/okafoja01.html', '/players/o/okoboel01.html', '/players/o/okogijo01.html', '/players/o/oladivi01.html', '/players/o/olynyke01.html', '/players/o/osmande01.html', '/players/o/oubreke01.html', '/players/o/oubreke01.html', '/players/o/oubreke01.html', '/players/p/pachuza01.html', '/players/p/parkeja01.html', '/players/p/parkeja01.html', '/players/p/parkeja01.html', '/players/p/parketo01.html', '/players/p/parsoch01.html', '/players/p/pattepa01.html', '/players/p/pattoju01.html', '/players/p/paulch01.html', '/players/p/payneca01.html', '/players/p/payneca01.html', '/players/p/payneca01.html', '/players/p/paytoel01.html', '/players/p/paytoga02.html', '/players/p/pinsoth01.html', '/players/p/plumlma01.html', '/players/p/plumlmi01.html', '/players/p/poeltja01.html', '/players/p/pondequ01.html', '/players/p/porteot01.html', '/players/p/porteot01.html', '/players/p/porteot01.html', '/players/p/portibo01.html', '/players/p/portibo01.html', '/players/p/portibo01.html', '/players/p/poweldw01.html', '/players/p/powelno01.html', '/players/p/poythal01.html', '/players/q/qizh01.html', '/players/r/rabbiv01.html', '/players/r/randlch01.html', '/players/r/randlju01.html', '/players/r/redicjj01.html', '/players/r/reedda01.html', '/players/r/reynoca01.html', '/players/r/richajo01.html', '/players/r/richama01.html', '/players/r/riverau01.html', '/players/r/riverau01.html', '/players/r/riverau01.html', '/players/r/robinde01.html', '/players/r/robindu01.html', '/players/r/robingl02.html', '/players/r/robinje01.html', '/players/r/robinmi01.html', '/players/r/rondora01.html', '/players/r/rosede01.html', '/players/r/rosste01.html', '/players/r/roziete01.html', '/players/r/rubiori01.html', '/players/r/russeda01.html', '/players/s/sabondo01.html', '/players/s/sampsbr01.html', '/players/s/sampsja02.html', '/players/s/saricda01.html', '/players/s/saricda01.html', '/players/s/saricda01.html', '/players/s/satorto01.html', '/players/s/schrode01.html', '/players/s/scottmi01.html', '/players/s/scottmi01.html', '/players/s/scottmi01.html', '/players/s/sefolth01.html', '/players/s/seldewa01.html', '/players/s/seldewa01.html', '/players/s/seldewa01.html', '/players/s/sextoco01.html', '/players/s/shamela01.html', '/players/s/shamela01.html', '/players/s/shamela01.html', '/players/s/shumpim01.html', '/players/s/shumpim01.html', '/players/s/shumpim01.html', '/players/s/siakapa01.html', '/players/s/siberjo01.html', '/players/s/simmobe01.html', '/players/s/simmojo02.html', '/players/s/simmojo02.html', '/players/s/simmojo02.html', '/players/s/simmoko01.html', '/players/s/simonan01.html', '/players/s/smartma01.html', '/players/s/smithde03.html', '/players/s/smithde03.html', '/players/s/smithde03.html', '/players/s/smithis01.html', '/players/s/smithjr01.html', '/players/s/smithja02.html', '/players/s/smithja02.html', '/players/s/smithja02.html', '/players/s/smithja02.html', '/players/s/smithzh01.html', '/players/s/snellto01.html', '/players/s/spaldra01.html', '/players/s/spaldra01.html', '/players/s/spaldra01.html', '/players/s/spellom01.html', '/players/s/stausni01.html', '/players/s/stausni01.html', '/players/s/stausni01.html', '/players/s/stephdj01.html', '/players/s/stephla01.html', '/players/s/sumneed01.html', '/players/s/swanica01.html', '/players/s/swanica01.html', '/players/s/swanica01.html', '/players/t/tatumja01.html', '/players/t/teaguje01.html', '/players/t/templga01.html', '/players/t/templga01.html', '/players/t/templga01.html', '/players/t/teodomi01.html', '/players/t/terreja01.html', '/players/t/terryem01.html', '/players/t/terryem01.html', '/players/t/terryem01.html', '/players/t/theisda01.html', '/players/t/thomais02.html', '/players/t/thomakh01.html', '/players/t/thomala01.html', '/players/t/thompkl01.html', '/players/t/thomptr01.html', '/players/t/thornsi01.html', '/players/t/tollian01.html', '/players/t/townska01.html', '/players/t/trentga02.html', '/players/t/trieral01.html', '/players/t/tuckepj01.html', '/players/t/turneev01.html', '/players/t/turnemy01.html', '/players/u/udohek01.html', '/players/u/ulisty01.html', '/players/v/valanjo01.html', '/players/v/valanjo01.html', '/players/v/valanjo01.html', '/players/v/vandeja01.html', '/players/v/vanvlfr01.html', '/players/v/vonleno01.html', '/players/v/vucevni01.html', '/players/w/wadedw01.html', '/players/w/wagnemo01.html', '/players/w/waitedi01.html', '/players/w/walkeke02.html', '/players/w/walkelo01.html', '/players/w/walljo01.html', '/players/w/wallaty01.html', '/players/p/princta02.html', '/players/w/wanambr01.html', '/players/w/warretj01.html', '/players/w/washbju01.html', '/players/w/watanyu01.html', '/players/w/welshth01.html', '/players/w/westbru01.html', '/players/w/whitede01.html', '/players/w/whiteok01.html', '/players/w/whiteha01.html', '/players/w/wiggian01.html', '/players/w/willial03.html', '/players/w/willicj01.html', '/players/w/willijo04.html', '/players/w/willike04.html', '/players/w/willilo02.html', '/players/w/willima02.html', '/players/w/williro04.html', '/players/w/willitr02.html', '/players/w/wilsodj01.html', '/players/w/winslju01.html', '/players/w/woodch01.html', '/players/w/woodch01.html', '/players/w/woodch01.html', '/players/w/wrighde01.html', '/players/w/wrighde01.html', '/players/w/wrighde01.html', '/players/y/yabusgu01.html', '/players/y/youngni01.html', '/players/y/youngth01.html', '/players/y/youngtr01.html', '/players/z/zelleco01.html', '/players/z/zellety01.html', '/players/z/zellety01.html', '/players/z/zellety01.html', '/players/z/zizican01.html', '/players/z/zubaciv01.html', '/players/z/zubaciv01.html', '/players/z/zubaciv01.html']
I would use a set comprehension to remove duplicates and also I think nth-of-type to select the appropriate column reads more cleanly. Using bs4 4.7.1 import requests from bs4 import BeautifulSoup as bs soup = bs(requests.get('https://www.basketball-reference.com/leagues/NBA_2019_per_game.html').text, 'html.parser') links = {i['href'] for i in soup.select('#per_game_stats td:nth-of-type(1) a')} print(links) You could also use the following css selector: [csk] > a
Extracting data with BeautifulSoup and output to CSV
As mentioned in the previous questions, I am using Beautiful soup with python to retrieve weather data from a website. Here's how the website looks like: <channel> <title>2 Hour Forecast</title> <source>Meteorological Services Singapore</source> <description>2 Hour Forecast</description> <item> <title>Nowcast Table</title> <category>Singapore Weather Conditions</category> <forecastIssue date="18-07-2016" time="03:30 PM"/> <validTime>3.30 pm to 5.30 pm</validTime> <weatherForecast> <area forecast="TL" lat="1.37500000" lon="103.83900000" name="Ang Mo Kio"/> <area forecast="SH" lat="1.32100000" lon="103.92400000" name="Bedok"/> <area forecast="TL" lat="1.35077200" lon="103.83900000" name="Bishan"/> <area forecast="CL" lat="1.30400000" lon="103.70100000" name="Boon Lay"/> <area forecast="CL" lat="1.35300000" lon="103.75400000" name="Bukit Batok"/> <area forecast="CL" lat="1.27700000" lon="103.81900000" name="Bukit Merah"/>` <channel> I managed to retrieve the information I need using these codes : import requests from bs4 import BeautifulSoup import urllib3 #getting the ValidTime r = requests.get('http://www.nea.gov.sg/api/WebAPI/? dataset=2hr_nowcast&keyref=781CF461BB6606AD907750DFD1D07667C6E7C5141804F45D') soup = BeautifulSoup(r.content, "xml") time = soup.find('validTime').string print "validTime: " + time #getting the date for currentdate in soup.find_all('item'): element = currentdate.find('forecastIssue') print "date: " + element['date'] #getting the time for currentdate in soup.find_all('item'): element = currentdate.find('forecastIssue') print "time: " + element['time'] for area in soup.find('weatherForecast').find_all('area'): area_attrs_li = [area.attrs for area in soup.find('weatherForecast').find_all('area')] print area_attrs_li Here are my results : {'lat': u'1.34039000', 'lon': u'103.70500000', 'name': u'Jurong West', 'forecast': u'LR'}, {'lat': u'1.31200000', 'lon': u'103.86200000', 'name': u'Kallang', 'forecast': u'LR'}, How do I remove u' from the result? I tried using the method I found while googling but it doesn't seem to work I'm not strong in Python and have been stuck at this for quite a while. EDIT : I tried doing this : f = open("C:\\scripts\\nea.csv" , 'wt') try: for area in area_attrs_li: writer = csv.writer(f) writer.writerow( (time, element['date'], element['time'], area_attrs_li)) finally: f.close() print open("C:/scripts/nea.csv", 'rt').read() It worked however, I would like to split the area apart as the records are duplicates in the CSV : Thank you.
EDIT 1 -Topic: You're missing escape characters: C:\scripts>python neaweather.py File "neaweather.py", line 30 writer.writerow( ('time', 'element['date']', 'element['time']', 'area_attrs_li') ) writer.writerow( ('time', 'element[\'date\']', 'element[\'time\']', 'area_attrs_li') ^ SyntaxError: invalid syntax EDIT 2: if you want to insert values: writer.writerow( (time, element['date'], element['time'], area_attrs_li) ) EDIT 3: to split the result to different lines: for area in area_attrs_li: writer.writerow( (time, element['date'], element['time'], area) EDIT 4: The splitting is not correct at all, but it shall give a better understanding of how to parse and split data to change it for your needs. to split the area element again as you show in your image, you can parse it for area in area_attrs_li: # cut off the characters you don't need area = area.replace('[','') area = area.replace(']','') area = area.replace('{','') area = area.replace('}','') # remove other characters area = area.replace("u'","\"").replace("'","\"") # split the string into a list areaList = area.split(",") # create your own csv-seperator ownRowElement = ';'.join(areaList) writer.writerow( (time, element['date'], element['time'], ownRowElement) Offtopic: This works for me: import csv import json x="""[ {'lat': u'1.34039000', 'lon': u'103.70500000', 'name': u'Jurong West','forecast': u'LR'} ]""" jsontxt = json.loads(x.replace("u'","\"").replace("'","\"")) f = csv.writer(open("test.csv", "w+")) # Write CSV Header, If you dont need that, remove this line f.writerow(['lat', 'lon', 'name', 'forecast']) for jsontext in jsontxt: f.writerow([jsontext["lat"], jsontext["lon"], jsontext["name"], jsontext["forecast"], ])