I'm trying to scrape the data from the website
http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/.
I'm struggling to click on the first suggestion. I managed this code as below:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import time
import csv
import unittest
import sys
import datetime
import os.path
import pandas as pd
from geopy.geocoders import GoogleV3
from datetime import datetime
from selenium import webdriver
from bs4 import NavigableString
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.remote.errorhandler import ErrorHandler
def main(self):
CITIES = ["La Rochelle"]
self.driver = webdriver.Chrome()
driver=self.driver
ID=1
for city in CITIES:
print str(city) +" , "+str(ID)
ID+=1
try:
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.implicitly_wait(30)
driver.find_element_by_css_selector("#mapAutosuggest").send_keys(city) # Enter city
# Wait until autosuggestion come and click on first suggestion
condition = EC.visibility_of_element_located((By.CSS_SELECTOR, '#mapAutosuggest + span > span:nth-child(1)'))
WebDriverWait(driver, 5).until(condition).click()
driver.implicitly_wait(50)
except NoSuchElementException: #spelling error making this code not work as expected
pass
self.driver.quit()
Once you send the character sequence La Rochelle next to click on the first suggestion you have to induce WebDriverWait for the element to be clickable and you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.find_element_by_css_selector("#mapAutosuggest").send_keys("La Rochelle") # Enter city
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.slam-aui-results>span.slam-aui-results-line.focus"))).click()
Browser Snapshot:
import time
from geopy.geocoders import GoogleV3
from datetime import datetime
from selenium import webdriver
from bs4 import NavigableString
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.remote.errorhandler import ErrorHandler
from selenium.webdriver.chrome.options import Options
CITIES = ["La Rochelle"]
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='chrome_driver_path'
, chrome_options=chrome_options
)
ID=1
for city in CITIES:
print(str(city) + " , " +str(ID))
ID+=1
try:
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.implicitly_wait(30)
driver.find_element_by_css_selector("#mapAutosuggest").send_keys(city) # Enter city
# Wait until autosuggestion come and click on first suggestion
condition = EC.visibility_of_element_located((By.CSS_SELECTOR, '.slam-aui-results > span:nth-child(1)'))
WebDriverWait(driver, 5).until(condition)
firstResult = driver.find_element_by_css_selector(".slam-aui-results > span:nth-child(1)")
firstResult.click()
time.sleep(10)
driver.implicitly_wait(50)
except NoSuchElementException: #spelling error making this code not work as expected
pass
driver.quit()
Related
I am struggling with Selenium
for the url: https://pubchem.ncbi.nlm.nih.gov/compound/2078
I am trying to click the button Download, but it doesn't find the element.
My code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from ipykernel import kernelapp as app
import time
options = webdriver.ChromeOptions()
driver_path = 'C:\\Users\\Idener\\Downloads\\chromedriver_win32\\chromedriver.exe'
driver = webdriver.Chrome(driver_path, options=options)
url = f"https://pubchem.ncbi.nlm.nih.gov/compound/2078"
driver.get(url)
driver.find_element_by_xpath("//*[#id='"'page-download-btn'"']").click()
enter image description here
Your XPath is not valid. You don't need so much quotes
driver.find_element_by_xpath("//*[#id='page-download-btn']").click()
You are missing a delay.
Element should clicked only when it is completely rendered and ready to accept a click event. WebDriverWait expected_conditions explicit waits should be used for that.
Also, no need to add f before URL value and '"' instead of ' in XPath expression.
The following code will work for you:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from ipykernel import kernelapp as app
import time
options = webdriver.ChromeOptions()
driver_path = 'C:\\Users\\Idener\\Downloads\\chromedriver_win32\\chromedriver.exe'
driver = webdriver.Chrome(driver_path, options=options)
url = "https://pubchem.ncbi.nlm.nih.gov/compound/2078"
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.ID, "page-download-btn"))).click()
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup
import pandas as pd
import requests
from selenium.webdriver import Chrome
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=options)
driver.get('https://www.nasdaq.com/')
driver.maximize_window()
wait = WebDriverWait(driver, 15)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="find-symbol-input"]'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="find-symbol-input"]'))).clear()
driver.find_element(By.ID, "find-symbol-input").send_keys("GLD")
wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div/main/div[2]/div[2]/section[1]/div/div[1]/div[1]/form/div/div/div/a[1]'))).click()
driver.implicitly_wait(15)
wait.until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/div/main/div[2]/div[4]/div/section/div/div[3]/ul/li[2]/a/span"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[3]/div/main/div[2]/div[4]/div/section/div/div[3]/ul/li[2]/a/span"))).clear()
driver.implicitly_wait(10)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="table-tabs__tab table-tabs__tab--active"]'))).click()
driver.quit()
** not sure what i am doing wrong any feedback is helpful thank you**
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.remote import webelement
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
url = "https://www.mrporter.com/en-gb/mens/product/nike/shoes/low-top-sneakers/space-hippie-04-recycled-stretch-knit-sneakers/19971654707345242"
PATH = 'C:\Program Files (x86)\chromedriver.exe'
browser = webdriver.Chrome(PATH)
browser.get(url)
element_dropdown = browser.find_element_by_class_name("CombinedSelect11__field CombinedSelect11__field--selectableOption CombinedSelect11__field--nativeSelect")
select = Select(element_dropdown)
try:
select.select_by_visible_text("8")
except NoSuchElementException:
print("the item doesnt exist")
I am trying to locate the dropdown menu of the link in my code. Once the dropdown box is located I want to search by visible text for a size 8. However whatever I try it still doesn't work.
You can try using explicit wait and then perform your operation. Please take a look at the below code which I have written to replicate your scenario. It's working fine for me. Do let me know if you face any problems.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
driver = webdriver.Chrome(options=options)
driver.get(
'https://www.mrporter.com/en-gb/mens/product/nike/shoes/low-top-sneakers/space-hippie-04-recycled-stretch-knit-'
'sneakers/19971654707345242')
wait = WebDriverWait(driver, 30)
wait.until(EC.visibility_of_element_located((By.XPATH, '//div[text()="Select a size"]'))).click()
wait.until(EC.visibility_of_element_located((By.XPATH, '//li[#data-value="8"]'))).click()
Try to get the text from html page based on Xpath using python 2.7 selenium scripting ... Code Not Working is at the end of the script last 3 lines ... Any pointers will be helpful. Thanks
import os
import time
import webbrowser
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains as AC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
#region Browser
driver = webdriver.Firefox()
print("Browser: " + driver.name)
driver.set_window_size(1024, 768)
#endregion
driver.get("http://xyz/...")
textName = "Max Size"
try:
textElement = driver.find_elements_by_xpath('/html/body/table/tbody/tr[1]/td[2]/span[16]')
textContent = textElement.text
print ("Text ... " + textContent)
...
how to redirect output of a file to a dataframe.
this code opens browser and types the given data provided and gets the first data available.
code :
selenium pincodes
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
print(i,elmts[0].text)
time.sleep(3)
driver.quit()
this code outputs the following
newyork 10001
mumbai 230532
how to redirect this output into df like this
city pincode
newyork 10001
mumbai 230532
Declare two arrays and add into dataframe.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
#Declare list here
City=[]
PinCode=[]
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
#Append the data into list
City.append(i)
PinCode.append(elmts[0].text)
#added into dataframe
df=pd.DataFrame({"City":City,"PinCode":PinCode})
print(df)
time.sleep(3)
driver.quit()
Output:
City PinCode
0 mumbai 230532
1 newyork 10001
Like this?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
df_output = pd.DataFrame(columns=["City", "pincode"])
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
print(i,elmts[0].text)
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elmts[0].text]]))
time.sleep(3)
driver.quit()
print(df_output)
executing a script (as a subprocess) from another script and parsing the (redirected) output can technically be done, but that's definitly not the right approach (nor the easiest actually).
Instead, you want to refactor your scripts as functions in modules, and have one function working on the results of the other.