HTML table is missing the last value when extracted via Selenium - python

This code scrapes everything but the last value in the table (0 Measure). I've looked everywhere but can't understand why it's missing it. Any idea?
import pandas as pd
import csv
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
t1 = []
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(executable_path = 'mypath/chromedriver.exe', options = options)
driver.get("https://ai.fmcsa.dot.gov/SMS")
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[#title='Close']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "(//input[#name='MCSearch'])[2]"))).send_keys('1605061')
wait.until(EC.element_to_be_clickable((By.XPATH, "(//input[#name='search'])[2]"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#id='BASICs']/p[2]/a"))).click()
tbl = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "//tr[#class='valueRow sumData']")))
tab = tbl.text
print(tab)

Related

Why can't I retrieve the price from a webpage?

I am trying to retrieve a price from a product on a webshop but can't find the right code to get it.
Price of product I want to extract: https://www.berger-camping.nl/zoeken/?q=3138522088064
This is the line of code I have to retrieve the price:
Prijs_BergerCamping = driver.find_element(by=By.XPATH, value='//div[#class="prod_price__prod_price"]').text
print(Prijs_BergerCamping)
Any tips on what I seem to be missing?
Your code is correct.
I guess all you missing is to wait for element visibility.
This code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)
The output is:
79,99 €
But you also need to close the cookies banner and Select store dialog (at least I see it). So, my code has the following additionals:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 20)
url = "https://www.berger-camping.nl/zoeken/?q=3138522088064"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#style] //*[contains(#class,'uk-close')]"))).click()
price = wait.until(EC.visibility_of_element_located((By.XPATH, '//div[#class="prod_price__prod_price"]'))).text
print(price)

website search bar doesn't work for python selenium

I would like auto-click the website and search for the information, but somehow the website cannot search, and keep loading. Or just close quickly after it print the key in search bar.
I would like auto-click the website and search for the information, and I tried:
import selenium
import pandas as pd
import numpy as np
import platform
import time
import random
from os import getcwd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-notification")
options.add_argument("--disable-infobars")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument("--remote-debugging-port=9230")
#options.add_argument("--headless")
url = 'https://vip.stock.finance.sina.com.cn/mkt/#hs_z'
driver.get(url)
w = WebDriverWait(driver, 10)
w.until(EC.presence_of_element_located((By.XPATH, '//*[#id="inputSuggest"]')))
driver.find_element('xpath', '//*[#id="inputSuggest"]').clear()
driver.find_element('xpath', '//*[#id="inputSuggest"]').send_keys('sz111973'))
driver.find_element('xpath', '//*[#id="SSForm"]/input[3]').click()
But somehow the website cannot search, and keep loading. Or just close quickly after it print the key in search bar.
Any help will be appreciated! Thanks.
There are several issues here:
to prevent site from very long loading you can use eager pageLoadStrategy.
I see redundant ) at the end of this line driver.find_element('xpath', '//*[#id="inputSuggest"]').send_keys('sz111973'))
The following code works perfect:
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options, desired_capabilities=caps,)
url = 'https://vip.stock.finance.sina.com.cn/mkt/#hs_z'
driver.get(url)
wait = WebDriverWait(driver, 20)
input = wait.until(EC.element_to_be_clickable((By.ID, 'inputSuggest')))
input.clear()
input.send_keys('sz111973')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#inputSuggest +input'))).click()

Selenium problem with ElementClickInterceptedException

I am trying to scraping in this URL, dealing with a Download button and I am having a problem, as the last line gives a ElementClickInterceptedException.
My actual goal is to download the CSV file.
The code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from ipykernel import kernelapp as app
import time
options = webdriver.ChromeOptions()
driver_path = 'C:\\Users\\Idener\\Downloads\\chromedriver_win32\\chromedriver.exe'
driver = webdriver.Chrome(driver_path, options=options)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078"
driver.get(url)
wait = WebDriverWait(driver, 5)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="NIOSH-Toxicity-Data"]/div[1]/div/div/a'))).click()
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="Download"]'))).click()
enter image description here
Element you trying to click in initially out of the visible viewpoint. So, you need first to scroll the page and only then to click on that element.
By clicking the first element new tab is opened and the second element you want to click is there, on the second tab. So, you need to switch to the new tab to access that element.
No need to define wait = WebDriverWait(driver, 10) second time.
The following code is working:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078#section=Toxicity"
driver.get(url)
element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#NIOSH-Toxicity-Data a[title*='Open']")))
element.location_once_scrolled_into_view
time.sleep(1)
element.click()
driver.switch_to.window(driver.window_handles[1])
wait.until(EC.element_to_be_clickable((By.ID, "Download"))).click()
It does not download the file, only opens the downloading dialog

Delete specific index using pop

I want to delete index 1 and index 16 by using pop method but they will not delete it from index kindly suggest if any solution for it these is page link https://www.barreaunantes.fr/annuaire/
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
import pandas as pd
options = Options()
options.add_argument("--no-sandbox")
options.add_argument("start-maximized")
#options.add_experimental_option("detach", True)
webdriver_service = Service("C:\Program Files (x86)\chromedriver.exe") #Your chromedriver path
driver = webdriver.Chrome(service=webdriver_service,options=options)
url = 'https://www.barreaunantes.fr/annuaire/'
driver.get(url)
data=[]
def main():
for x in range(28):
try:
select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#ville-select'))))
select.pop(16)
select.select_by_index(x)
time.sleep(2)
except:
pass
try:
click_on_search_button = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, '(//*[#value="Rechercher"])[1]')))
except:
pass
driver.execute_script("arguments[0].click();", click_on_search_button)

Use Selenium to download data but the input textbook has a default list to choose instead (Python)

I am trying to open a page by inputting the stock code in the textbox and press search. But the input stock code automatically generates a list of other possible things to choose from. How can I choose the only first time of such list?
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
url = 'https://www1.hkexnews.hk/search/titlesearch.xhtml'
browser = webdriver.Firefox(executable_path = 'C:\Program Files\Mozilla Firefox\geckodriver.exe')
StockList = ['02192']
browser.get(url)
txtBox = browser.find_element_by_id('searchStockCode')
txtBox.send_keys(StockList[0])
txtBox.submit()
You can use below xpath to click on first option that is being populating.
//div[#class='slimScrollDiv']/descendant::tbody/tr[1]
and click it like this :
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='slimScrollDiv']/descendant::tbody/tr[1]"))).click()
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
In case you are looking for full working code :
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
driver = webdriver.Firefox(executable_path = "C:\Program Files\Mozilla Firefox\geckodriver.exe")
driver.maximize_window()
driver.implicitly_wait(30)
driver.get("https://www1.hkexnews.hk/search/titlesearch.xhtml")
wait = WebDriverWait(driver, 20)
StockList = ['02192']
wait.until(EC.element_to_be_clickable((By.ID, "searchStockCode"))).send_keys(StockList[0])
ele = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='slimScrollDiv']/descendant::tbody/tr[1]")))
ActionChains(driver).move_to_element(ele).click().perform()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[class*='applyFilters']"))).click()

Categories