How can I scrape Tax information from zillow with selenium - python

zillow picture
I have the above image at https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/60.780619,-65.522461,4.521666,-125.551758_rect/3_zm/
I cant seem to find the selector for tax history.
I tried to use driver wait but the table that is output is the price history not tax history.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
options = webdriver.ChromeOptions()
options.add_argument(f"user-agent={useragent[0]}")
options.add_argument('--proxy-server=%s' % ips[0])
options.add_argument('--incognito')
chromedriver = '~/Downloads/chromedriver'
chromedriver = os.path.expanduser(chromedriver)
driver = webdriver.Chrome(chromedriver, chrome_options=options)
driver.get('https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/60.673178,-74.663086,4.653079,-116.323243_rect/3_zm/2_p/')
wait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "hdp-collapse"))).click()
table = wait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div#hdp-tax-history")))

Looks like you need some waits (and maybe some clicks to get that tab visible. You can write out the table. The below is just to show how you can access
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
url = 'https://www.zillow.com/homes/for_sale/7132668_zpid/globalrelevanceex_sort/53.566414,-73.081055,17.434511,-118.081055_rect/3_zm/'
d = webdriver.Chrome()
d.get(url)
WebDriverWait(d,20).until(EC.presence_of_element_located((By.ID , 'price-and-tax-history'))).click()
tabs = WebDriverWait(d,5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".zsg-tab-link")))
tabs[1].click()
print(d.find_element_by_css_selector('#hdp-tax-history table').text) # just to show present

Related

Getting the HTML element using Selenium WebDriver

I'm trying to get price of a product on amazon using Selenium:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
url = \
"https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
services = Service(r"C:\Users\Deepak Shetter\chromedriver_win32\chromedriver.exe")
driver = webdriver.Chrome(service=services)
driver.get(url)
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print("price is "+price.text)
As you can see in this image the html for the price is of class="a-offscreen". But when I run my code on pycharm it return None. How can I get the price string? (btw I checked it using Beautiful soup and it worked fine)
Edit :
This time I used another url : https://www.amazon.in/Avvatar-Alpha-Choco-Latte-Shaker/dp/B08S3TNGYK/?_encoding=UTF8&pd_rd_w=ofFKu&content-id=amzn1.sym.1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_p=1f592895-6b7a-4b03-9d72-1a40ea8fbeca&pf_rd_r=PT3Y6GWJ7YHADW09VKNK&pd_rd_wg=lBWZa&pd_rd_r=0a44c278-bcfa-49c2-806b-cf8eb292038a&ref_=pd_gw_ci_mcx_mr_hp_atf_m
In this case it has 2 price elements one with the class="a-offscreen" and another one with calss="a-price-whole".
my code :
price = driver.find_element(By.CLASS_NAME, "a-price-whole")
this time return value is 1,580.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5"
driver.get(url)
price = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#corePrice_desktop .a-span12 .apexPriceToPay"))).text
print(price)
The output is:
₹566
Using Safari, the following code works:
from selenium import webdriver
from selenium.webdriver.common.by import By
with webdriver.Safari() as driver:
driver.get('https://www.amazon.in/Celevida-Kesar-Elaichi-Flavor-Metal/dp/B081WJ6536/ref=sr_1_5?crid=3NRZERQ8H4T8L&keywords=dr+reddys+celevida&qid=1672124472&sprefix=%2Caps%2C5801&sr=8-5')
price = driver.find_element(By.CLASS_NAME, "a-offscreen")
print(price.text)
Which gives this output:
₹566.00
Therefore it appears that your use of the ChromeDriver may be flawed

How to use inspect element for selection tab in selenium

I have written a code than logins to a webpage and tries to download a file. but after login and before downloading it requires to select a particular field from drop down and then select a particular tab to download the file, can anyone pls assist what could be done here ?
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
wait = WebDriverWait(driver, 20)
url = "products.markit.com/home/login.jsp"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.NAME, "username"))).send_keys("Admin")
wait.until(EC.element_to_be_clickable((By.NAME, "password"))).send_keys("admin123")
wait.until(EC.element_to_be_clickable((By.TAG_NAME, "button"))).click()
Inspect element for first selection (Pricing Data - Loan)
Inspect element for Past Loan Market
Inspect element for Download tab

Cannot find xpath element to click with webdriver

I'm using the https://www.perlego.com/ website to do some scraping for books, my current problem is that i'm trying to search using the isbn Number of the book so the webdriver gets this following page for example https://www.perlego.com/search?query=9780717183241
However, using page inspection on the website, i cannot find the element that i can click on using webdriver
the idea is to click on the book and load the page that comes with it
The following code closes the cookies banner and clicks on the book link and opens it
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.perlego.com/search?query=9780717183241&tab=book&page=1&language=All%20languages&publicationDate=&topic=&publisher=&author=&format="
driver.get(url)
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-testid*='Cookies']"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-test-locator*='new-book']"))).click()

Selenium problem with ElementClickInterceptedException

I am trying to scraping in this URL, dealing with a Download button and I am having a problem, as the last line gives a ElementClickInterceptedException.
My actual goal is to download the CSV file.
The code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from ipykernel import kernelapp as app
import time
options = webdriver.ChromeOptions()
driver_path = 'C:\\Users\\Idener\\Downloads\\chromedriver_win32\\chromedriver.exe'
driver = webdriver.Chrome(driver_path, options=options)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078"
driver.get(url)
wait = WebDriverWait(driver, 5)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="NIOSH-Toxicity-Data"]/div[1]/div/div/a'))).click()
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="Download"]'))).click()
enter image description here
Element you trying to click in initially out of the visible viewpoint. So, you need first to scroll the page and only then to click on that element.
By clicking the first element new tab is opened and the second element you want to click is there, on the second tab. So, you need to switch to the new tab to access that element.
No need to define wait = WebDriverWait(driver, 10) second time.
The following code is working:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078#section=Toxicity"
driver.get(url)
element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#NIOSH-Toxicity-Data a[title*='Open']")))
element.location_once_scrolled_into_view
time.sleep(1)
element.click()
driver.switch_to.window(driver.window_handles[1])
wait.until(EC.element_to_be_clickable((By.ID, "Download"))).click()
It does not download the file, only opens the downloading dialog

Dropdown menu not clicking in python selenium

import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.remote import webelement
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
url = "https://www.mrporter.com/en-gb/mens/product/nike/shoes/low-top-sneakers/space-hippie-04-recycled-stretch-knit-sneakers/19971654707345242"
PATH = 'C:\Program Files (x86)\chromedriver.exe'
browser = webdriver.Chrome(PATH)
browser.get(url)
element_dropdown = browser.find_element_by_class_name("CombinedSelect11__field CombinedSelect11__field--selectableOption CombinedSelect11__field--nativeSelect")
select = Select(element_dropdown)
try:
select.select_by_visible_text("8")
except NoSuchElementException:
print("the item doesnt exist")
I am trying to locate the dropdown menu of the link in my code. Once the dropdown box is located I want to search by visible text for a size 8. However whatever I try it still doesn't work.
You can try using explicit wait and then perform your operation. Please take a look at the below code which I have written to replicate your scenario. It's working fine for me. Do let me know if you face any problems.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = webdriver.Chrome(options=options)
driver.get(
'https://www.mrporter.com/en-gb/mens/product/nike/shoes/low-top-sneakers/space-hippie-04-recycled-stretch-knit-'
'sneakers/19971654707345242')
wait = WebDriverWait(driver, 30)
wait.until(EC.visibility_of_element_located((By.XPATH, '//div[text()="Select a size"]'))).click()
wait.until(EC.visibility_of_element_located((By.XPATH, '//li[#data-value="8"]'))).click()

Categories