I am learning how to use Selenium with Python and have been toying around with a few different things. I keep having an issue where I cannot locate any classes. I am able to locate and print data by xpath, but I cannot locate the classes.
The goal of this script is to gather a number from the table on the website and the current time, then append the items to a CSV file.
Site: https://bitinfocharts.com/top-100-richest-dogecoin-addresses.html
Any advice or guidance would be greatly appreciated as I am new to python. Thank you.
Code:
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
import time
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import csv
from datetime import datetime
from selenium.webdriver.common.by import By
#Open ChromeDriver
PATH = ('/Users/brandon/Desktop/chromedriver')
driver = webdriver.Chrome(PATH)
driver.get("https://bitinfocharts.com/top-100-richest-dogecoin-addresses.html")
driver.implicitly_wait(10)
driver.maximize_window()
#Creates the Time
now = datetime.now()
current_time = now.strftime("%m/%d/%Y, %H:%M:%S")
#####
#Identify the section of page
page = driver.find_element(By.CLASS_NAME,'table table-condensed bb')
time.sleep(3)
#Gather the data
for page in pages():
num_of_wallets = page.find_element(By.XPATH,
"//html/body/div[5]/table[1]/tbody/tr[10]/td[2]").text
table_dict = {"Time":current_time,
"Wallets":num_of_wallets}
file = open('dogedata.csv', 'a')
try:
file.write(f'{current_time},{num_of_wallets}')
finally:
file.close()
table table-condensed bb actually contains 3 class names.
So the best way to locate element based on multiple class names is to use css selector or xpath like:
page = driver.find_element(By.CSS_SELECTOR,'.table.table-condensed.bb')
or
page = driver.find_element(By.XPATH,"//*[#class='table table-condensed bb']")
Related
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from twilio.rest import Client
from datetime import datetime
import datefinder
import os
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
url = 'https://www.recreation.gov/permits/233262/registration/detailed-availability?type=overnight-permit'
driver = webdriver.Chrome()
driver.get(url)
title = ""
text = ""
campsites = ""
waiter = webdriver.support.wait.WebDriverWait(driver, 30)
waiter.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="per-availability-main"]/div[1]/div[1]/div/div/div/div/div[3]/div/div[2]/button[2]')))
element = driver.find_element(By.XPATH, '//*[#id="per-availability-main"]/div[1]/div[1]/div/div/div/div/div[3]/div/div[2]/button[2]')
element.click()
time.sleep(0.3)
element.click()
time.sleep(0.4)
waiter.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="per-availability-main"]/div[1]/div[3]/div/fieldset/div/div[2]/label/span')))
element = driver.find_element(By.XPATH, '//*[#id="per-availability-main"]/div[1]/div[3]/div/fieldset/div/div[2]/label/span')
element.click()
time.sleep(0.5)
waiter.until(EC.visibility_of_element_located((By.CLASS_NAME, 'rec-grid-grid-cell available')))
elements = driver.find_elements(By.CLASS_NAME, 'rec-grid-grid-cell available')
time.sleep(4)
So this code is to eventually compile a list of available permits for a given date for me to quickly find out which I want to do. It clicks 2 users and selects "no" for the guided trip. This reveals a grid, which shows the available sites. The first 2 steps work completely fine. It stops working when it tries to work with the grid.
I'm trying to locate available sites with the class name "rec-grid-grid-cell available"
I have also tried locating anything on that grid by XPATH and it can't seem to find anything. Is there a special way to deal with grids that appear after a few clicks?
If you need more information, please ask.
Unfortunately you cannot pass multiple css class names to By.CLASS_NAME.
So you can do either:
available_cells_css = ".rec-grid-grid-cell.available"
available_cells = waiter.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, available_cells_css)))
or
available_cells_xpath = "//div[#class='rec-grid-grid-cell available']"
available_cells = waiter.until(EC.visibility_of_all_elements_located((By.XPATH, available_cells_xpath)))
So i am trying to scrape Data in Shopeee using Pycharm/Python and Selenium. here is the code:
from selenium import webdriver
import time
import csv
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://shopee.ph/search?keyword=nacific&noCorrection=true&page=0&withDiscount=true")
time.sleep(2)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
Categories = []
Categories.append(["NAME"])
driver.implicitly_wait(10)
products = driver.find_elements_by_xpath("//div[#class='row shopee-search-item-result__items']/div")
for product in products:
name_p = product.find_element_by_xpath("//div[#class='yQmmFK _1POlWt _36CEnF']")
rowData = [name_p]
Categories.append(rowData)
with open('Shopee.csv', 'a', encoding='utf-8') as file:
Import = csv.writer(file,lineterminator='\n')
Import.writerows(Categories)
so after i ran it...i "succcessfully" run it but the problem is this:
rather than showing the name of product it show selenium.webdriver and so on
i tried to change to other code by not using xpath and do the regular way(find_element_by_class_name etc) but it still cause error. i wonder why it is not working? can someone help me?
Website im trying to scrape: Shopee.ph
Software: Pycharm and Selenium
You can use .text to get the text from Selenium WebElements.
Example
product.find_element_by_xpath("//div[#class='yQmmFK _1POlWt _36CEnF']").text
There're many ways to locate web elements.
element = driver.find_element_by_id("passwd-id")
element = driver.find_element_by_name("passwd")
element = driver.find_element_by_xpath("//input[#id='passwd-id']")
element = driver.find_element_by_css_selector("input#passwd-id")
You can find the document here.
https://selenium-python.readthedocs.io/navigating.html
I've also fixed some of your code. Here
products = driver.find_elements_by_class_name('yQmmFK')
for product in products:
name_p = product.text
rowData = [name_p]
Categories.append(rowData)
driver.close()
I am very new to Python and I am trying to get all store locations from website of Muthoot. the following is a code i wrote but i am not getting any output. Please let me know what is wrong and what i need to correct.
As i understand, the code is not getting the search button clicked and hence nothing is moving. But how to do that??
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
driver= webdriver.Chrome(executable_path="D:\Chromedriverpath\chromedriver_win32\chromedriver.exe")
driver.get("https://www.muthootfinance.com/branch-locator")
#Saving this element in a variable
drp=Select(driver.find_element_by_id("statelist"))
slist=drp.options
for ele in slist:
table=driver.select("table.table")
columns=table.find("thead").find_all("th")
column_names=[c.string for c in columns]
table_rows=table.find("tbody").find_all("tr")
l=[]
for tr in table_rows:
td=tr.find_all('td')
row=[str(tr.get_text()).strip() for tr in td]
l.append(row)
df=pd.DataFrame(l,columns=column_names)
df.head()
I think this will work for you now, I copied your code and it seems to work!
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import pandas as pd
driver = webdriver.Chrome("C:\Program Files (x86)\chromedriver.exe")
driver.get("https://www.muthootfinance.com/branch-locator")
# Saving this element in a variable
html_list = driver.find_element_by_id("state_branch_list")
items = html_list.find_elements_by_tag_name("li")
for item in items:
places = item.text
print(places)
df = pd.DataFrame([places])
I've been trying to scrape data from a table using selenium, but when I run the code, it only gets the header of the table.
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('http://www.panamacompra.gob.pa/Inicio/#!/busquedaAvanzada?BusquedaRubros=true&IdRubro=41')
driver.implicitly_wait(100)
table = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody')
print(t.text)
I also tried finding element by tag name using table, without luck.
you should try this:
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('http://www.panamacompra.gob.pa/Inicio/#!/busquedaAvanzada?BusquedaRubros=true&IdRubro=41')
driver.implicitly_wait(100)
table = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody')
number=2
while(number<12):
content = driver.find_element_by_xpath('//*[#id="body"]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody/tr['+str(number)+']')
print(content.text)
number+=1
The XPATH in 'table' is just the header, the actual content is this : '//*[#id="body"]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody/tr['+str(number)+']' , that's why you are not getting any content different than the header. Since the XPATH in the rows are like ...../tr[2],...../tr[3],...../tr[4], etc, Im using the str(number) < 12 , to get all the raws, you can also try with 50 rows a the time, is up to you.
I would use requests and mimic the POST request by the page as much faster
import requests
data = {'METHOD': '0','VALUE': '{"BusquedaRubros":"true","IdRubro":"41","Inicio":0}'}
r = s.post('http://www.panamacompra.gob.pa/Security/AmbientePublico.asmx/cargarActosOportunidadesDeNegocio', data=data).json()
print(r['listActos'])
You need wait until loader disappear, you can use invisibility_of_element_located, utilize WebDriverWait and expected_conditions. For the table you can use css_selector instead your xpath.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
driver = webdriver.Chrome()
driver.get('http://www.panamacompra.gob.pa/Inicio/#!/busquedaAvanzada?BusquedaRubros=true&IdRubro=41')
time.sleep(2)
WebDriverWait(driver, 50).until(EC.invisibility_of_element_located((By.XPATH, '//img[#src="images/loading.gif"]')))
table = driver.find_element_by_css_selector('.table_asearch.table.table-bordered.table-striped.table-hover.table-condensed')
print(table.text)
driver.quit()
Selenium is loading the table (happens fairly quickly) and then assuming it is done, since it's never given a chance to load the table rows (happens more slowly). One way around this is to repeatedly try to find an element that won't appear until the table is finished loading.
This is FAR from the most elegant solution (and there's probably Selenium libraries that do it better), but you can wait for the table by checking to see if a new table row can be found, and if not, sleep for 1 second before trying again.
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time
driver = webdriver.Chrome()
driver.get('http://www.panamacompra.gob.pa/Inicio/#!/busquedaAvanzada?BusquedaRubros=true&IdRubro=41')
wvar = 0
while(wvar == 0):
try:
#try loading one of the elements we want to read
el = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody/tr[3]')
wvar = 1
except NoSuchElementException:
#not loaded yet
print('table body empty, waiting...')
time.sleep(1)
print('table loaded!')
#element got loaded; reload the table
table = driver.find_element_by_xpath('/html/body/div[1]/div[2]/div/div[2]/div/div/div[2]/div[2]/div[3]/table/tbody')
print(table.text)
I am trying to write a script that uses selenium to download many files which consist of different NHL players information; game-log. I want to download a file for each players in the following table: https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single
Once on that website, I wanted to click on all the players' name in the table. When a player's name is clicked through the href link, a new window opens. There are few drop-down menus at the top. I want to select "Rate" instead of "Counts" and also select " Game Log" instead of "Player Summary", and then click "Submit". Finally, I want to click on CSV(All) at the bottom to download a CSV file.
Here is my current code:
from selenium import webdriver
import csv
from selenium.webdriver.support.ui import Select
from datetime import date, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(chromedriver)
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
table = driver.find_element_by_xpath("//table[#class='indreg dataTable no-footer DTFC_Cloned']")
for row in table.find_elements_by_xpath("//tr[#role='row']")
links = driver.find_element_by_xpath('//a[#href]')
links.click()
select = Select(driver.find_element_by_name('rate'))
select.select_by_value("y")
select1 = Select(driver.find_element_by_name('v'))
select1.select_by_value("g")
select2 = Select(driver.find_element_by_type('submit'))
select2.select_by_value("submit")
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//div[#class="dt-button button-csv button-htm15"]')))
CSVall = driver.find_element_by_xpath('//div[#class="dt-button button-csv button-htm15"]')
CSVall.click()
driver.close()
I try to change different things, but I always get an error. Where is the problem ?
Moreover, I think I should probably add a line to wait for the website to load because it takes a few seconds; after "driver.get". I do not know what should be the expected conditions to end the wait in this case.
Thanks
Rather than keep clicking through selections you could grab the playerIds from the first page and concantenate those, along with the strings representing the selections for Rate and Game Log into the queryString part of the new URL. Sure you can tidy up the following.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def getPlayerId(url):
id = url.split('playerid=')[1]
id = id.split('&')[0]
return id
def makeNewURL(playerId):
return 'https://www.naturalstattrick.com/playerreport.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&stdoi=oi&rate=y&v=g&playerid=' + playerId
#chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome()
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
links = driver.find_elements_by_css_selector('table.indreg.dataTable.no-footer.DTFC_Cloned [href*=playerid]')
newLinks = []
for link in links:
newLinks.append(link.get_attribute('href'))
for link in newLinks:
playerId = getPlayerId(link)
link = makeNewURL(playerId)
driver.get(link)
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//a[#class="dt-button buttons-csv buttons-html5"][2]')))
CSVall = driver.find_element_by_xpath('//a[#class="dt-button buttons-csv buttons-html5"][2]')
CSVall.click()
you don't need to click each player link but save the URLs as list, also there are several error, you can see working code below
from selenium import webdriver
import csv
from selenium.webdriver.support.ui import Select
from datetime import date, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(chromedriver)
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
playerLinks = driver.find_elements_by_xpath("//table[#class='indreg dataTable no-footer DTFC_Cloned']//a")
playerLinks = [p.get_attribute('href') for p in playerLinks]
print(len(playerLinks))
for url in playerLinks:
driver.get(url)
select = Select(driver.find_element_by_name('rate'))
select.select_by_value("y")
select1 = Select(driver.find_element_by_name('v'))
select1.select_by_value("g")
driver.find_element_by_css_selector('input[type="submit"]').click()
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//a[#class="dt-button buttons-csv buttons-html5"][2]')))
CSVall = driver.find_element_by_xpath('//a[#class="dt-button buttons-csv buttons-html5"][2]')
CSVall.click()
driver.close()