how to solve captcha - python

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import csv
import requests
from csv import writer
url="https://allegro.pl/oferta/lurlego-76900-speed-champions-koenigsegg-jesko-11096977887"
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
try:
title=driver.find_element(By.XPATH,'//div[#class="msub_k4"]//h4').text
except:
pass
print(title)
How to solve that will scraping the data any solution for it....is there any possible solution for that kindly recommend me any solution...

Related

Xpath wrong using selenium

from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
URL = 'https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino'
driver.get(URL)
data=driver.find_elements(By.XPATH,"//section[#class='review-text richtext']")
for row in data:
try:
para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text
except:
pass
print(para0)
I want they collect the data of Games only but they also get the data of Virtual Games so how we restrict the contains method that get only data of Games only kindly recommend any solution for that these is page link https://www.askgamblers.com/online-casinos/reviews/yukon-gold-casino-casino
Want these only
do not get these text of virtual game
[contains(.,'Games')] will match both Games and Virtual Games.
What you can do here is:
Use equals instead of contains, like this:
"[text()='Games']"
or use starts-with:
"[starts-with(text(), 'Games')]"
So this line para0= row.find_element(By.XPATH,"//h2[text()[contains(.,'Games')]]/following-sibling::p[following::h2[text()[contains(.,'Support')]]]").text can be changed to
para0= row.find_element(By.XPATH,"//h2[text()='Games']/following-sibling::p[following::h2[contains(.,'Support')]]").text
or
para0= row.find_element(By.XPATH,"//h2[starts-with(text(), 'Games')]/following-sibling::p[following::h2[contains(.,'Support')]]").text

website search bar doesn't work for python selenium

I would like auto-click the website and search for the information, but somehow the website cannot search, and keep loading. Or just close quickly after it print the key in search bar.
I would like auto-click the website and search for the information, and I tried:
import selenium
import pandas as pd
import numpy as np
import platform
import time
import random
from os import getcwd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-notification")
options.add_argument("--disable-infobars")
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument("--remote-debugging-port=9230")
#options.add_argument("--headless")
url = 'https://vip.stock.finance.sina.com.cn/mkt/#hs_z'
driver.get(url)
w = WebDriverWait(driver, 10)
w.until(EC.presence_of_element_located((By.XPATH, '//*[#id="inputSuggest"]')))
driver.find_element('xpath', '//*[#id="inputSuggest"]').clear()
driver.find_element('xpath', '//*[#id="inputSuggest"]').send_keys('sz111973'))
driver.find_element('xpath', '//*[#id="SSForm"]/input[3]').click()
But somehow the website cannot search, and keep loading. Or just close quickly after it print the key in search bar.
Any help will be appreciated! Thanks.
There are several issues here:
to prevent site from very long loading you can use eager pageLoadStrategy.
I see redundant ) at the end of this line driver.find_element('xpath', '//*[#id="inputSuggest"]').send_keys('sz111973'))
The following code works perfect:
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "eager"
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options, desired_capabilities=caps,)
url = 'https://vip.stock.finance.sina.com.cn/mkt/#hs_z'
driver.get(url)
wait = WebDriverWait(driver, 20)
input = wait.until(EC.element_to_be_clickable((By.ID, 'inputSuggest')))
input.clear()
input.send_keys('sz111973')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#inputSuggest +input'))).click()

how to loop a set of links given in an array using selenium in python

i don't know this for loop or is there some other way out. I am new to selenium
from selenium.webdriver.support import ui
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=options, executable_path='C:\Program Files (x86)/chromedriver')
driver.maximize_window()
a=["http://www.googgle.com","http://www.youtube.com","http://www.facebook.com"]
for i in a:
driver.get(a[i])
Try this instead.
from selenium.webdriver.support import ui
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(chrome_options=options, executable_path='C:\Program Files (x86)/chromedriver')
driver.maximize_window()
a=["http://www.googgle.com","http://www.youtube.com","http://www.facebook.com"]
for i in a:
driver.get(a)
There is no point in using indexing there.

How to redirect output of a program to a dataframe

how to redirect output of a file to a dataframe.
this code opens browser and types the given data provided and gets the first data available.
code :
selenium pincodes
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
print(i,elmts[0].text)
time.sleep(3)
driver.quit()
this code outputs the following
newyork 10001
mumbai 230532
how to redirect this output into df like this
city pincode
newyork 10001
mumbai 230532
Declare two arrays and add into dataframe.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
#Declare list here
City=[]
PinCode=[]
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
#Append the data into list
City.append(i)
PinCode.append(elmts[0].text)
#added into dataframe
df=pd.DataFrame({"City":City,"PinCode":PinCode})
print(df)
time.sleep(3)
driver.quit()
Output:
City PinCode
0 mumbai 230532
1 newyork 10001
Like this?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import html5lib
import json
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
pin=['mumbai','newyork']
df_output = pd.DataFrame(columns=["City", "pincode"])
for i in pin :
url = "https://www.google.com/"
chromedriver = r"C:\Users\me\chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(30)
driver.get(url)
search = driver.find_element_by_name('q')
search.send_keys(i,'pincode')
search.send_keys(Keys.RETURN)
WebDriverWait(driver, 10).until(expected_conditions.visibility_of_element_located((By.XPATH, '//div[#class="IAznY"]//div[#class="title"]')))
elmts = driver.find_elements_by_xpath('//div[#class="IAznY"]//div[#class="title"]')
print(i,elmts[0].text)
df_output = df_output.append(pd.DataFrame(columns=["City", "pincode"], data=[[i,elmts[0].text]]))
time.sleep(3)
driver.quit()
print(df_output)
executing a script (as a subprocess) from another script and parsing the (redirected) output can technically be done, but that's definitly not the right approach (nor the easiest actually).
Instead, you want to refactor your scripts as functions in modules, and have one function working on the results of the other.

Can't click on the first suggestion or first value

I'm trying to scrape the data from the website
http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/.
I'm struggling to click on the first suggestion. I managed this code as below:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import time
import csv
import unittest
import sys
import datetime
import os.path
import pandas as pd
from geopy.geocoders import GoogleV3
from datetime import datetime
from selenium import webdriver
from bs4 import NavigableString
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.remote.errorhandler import ErrorHandler
def main(self):
CITIES = ["La Rochelle"]
self.driver = webdriver.Chrome()
driver=self.driver
ID=1
for city in CITIES:
print str(city) +" , "+str(ID)
ID+=1
try:
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.implicitly_wait(30)
driver.find_element_by_css_selector("#mapAutosuggest").send_keys(city) # Enter city
# Wait until autosuggestion come and click on first suggestion
condition = EC.visibility_of_element_located((By.CSS_SELECTOR, '#mapAutosuggest + span > span:nth-child(1)'))
WebDriverWait(driver, 5).until(condition).click()
driver.implicitly_wait(50)
except NoSuchElementException: #spelling error making this code not work as expected
pass
self.driver.quit()
Once you send the character sequence La Rochelle next to click on the first suggestion you have to induce WebDriverWait for the element to be clickable and you can use the following solution:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\Utility\BrowserDrivers\chromedriver.exe')
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.find_element_by_css_selector("#mapAutosuggest").send_keys("La Rochelle") # Enter city
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.slam-aui-results>span.slam-aui-results-line.focus"))).click()
Browser Snapshot:
import time
from geopy.geocoders import GoogleV3
from datetime import datetime
from selenium import webdriver
from bs4 import NavigableString
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import WebDriverException
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.remote.errorhandler import ErrorHandler
from selenium.webdriver.chrome.options import Options
CITIES = ["La Rochelle"]
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
# chrome_options.add_argument("--headless")
driver = webdriver.Chrome(
executable_path='chrome_driver_path'
, chrome_options=chrome_options
)
ID=1
for city in CITIES:
print(str(city) + " , " +str(ID))
ID+=1
try:
driver.get("http://www.lacoteimmo.com/prix-de-l-immo/location/pays/france.htm#/")
driver.implicitly_wait(30)
driver.find_element_by_css_selector("#mapAutosuggest").send_keys(city) # Enter city
# Wait until autosuggestion come and click on first suggestion
condition = EC.visibility_of_element_located((By.CSS_SELECTOR, '.slam-aui-results > span:nth-child(1)'))
WebDriverWait(driver, 5).until(condition)
firstResult = driver.find_element_by_css_selector(".slam-aui-results > span:nth-child(1)")
firstResult.click()
time.sleep(10)
driver.implicitly_wait(50)
except NoSuchElementException: #spelling error making this code not work as expected
pass
driver.quit()

Categories