Error while using webscraping with selenium - python

I'm scraping reviews from the Google Play homepage.
I come out well and stop on the way.
I get the following error:
selenium.common.exceptions.ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"81","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:51812","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"id\": \":wdc:1505887578605\", \"sessionId\": \"cca93cc0-9dc9-11e7-a685-bd84ddef3ed2\"}","url":"/click","urlParsed":{"anchor":"","query":"","file":"click","directory":"/","path":"/click","relative":"/click","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/click","queryKey":{},"chunks":["click"]},"urlOriginal":"/session/cca93cc0-9dc9-11e7-a685-bd84ddef3ed2/element/:wdc:1505887578605/click"}}
Screenshot: available via screen
I did a search and many revisions, but I could not fix it.
I have been stopping for several days. I will upload my code.
Why does this error occur?
How should I fix it?
#python 3.6
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup, Comment
import pandas as pd
#Setting up Chrome webdriver Options
#chrome_options = webdriver.ChromeOptions()
#setting up local path of chrome binary file
#chrome_options.binary_location =
"/Users/Norefly/chromedriver2/chromedriver.exec"
#creating Chrome webdriver instance with the set chrome_options
driver = webdriver.PhantomJS("C:/Python/phantomjs-2.1.1-
windows/bin/phantomjs.exe")
link = "https://play.google.com/store/apps/details?
id=com.supercell.clashofclans&hl=en"
driver.get(link)
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
Ptitle = driver.find_element_by_class_name('id-app-title').text.replace(' ','')
print(Ptitle)
#driver.find_element_by_xpath('//*[#id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]').click()
sleep(1)
driver.find_element_by_xpath('//*[#id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').
click()
#select_newest.select_by_visible_text('Newest')
#driver.find_element_by_xpath('//*[#id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').
click()
sleep(2)
#driver.find_element_by_css_selector('.review-filter.id-review-sort-filter.dropdown-menu-container').click()
driver.find_element_by_css_selector('.displayed-child').click()
#driver.find_element_by_xpath("//button[#data-dropdown-value='1']").click()
driver.execute_script("document.querySelectorAll('button.dropdown-child')[0].click()")
reviews_df = []
for i in range(1,10000):
try:
for elem in driver.find_elements_by_class_name('single-review'):
print(str(i))
content = elem.get_attribute('outerHTML')
soup = BeautifulSoup(content, "html.parser")
#print(soup.prettify())
date = soup.find('span',class_='review-date').get_text()
rating = soup.find('div',class_='tiny-star')['aria-label'][6:7]
title = soup.find('span',class_='review-title').get_text()
txt = soup.find('div',class_='review-body').get_text().replace('Full Review','')[len(title)+1:]
print(soup.get_text())
temp = pd.DataFrame({'Date':date,'Rating':rating,'Review Title':title,'Review Text':txt},index=[0])
print('-'*10)
reviews_df.append(temp)
#print(elem)
except:
print('s')
driver.find_element_by_xpath('//*[#id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').
click()
reviews_df = pd.concat(reviews_df,ignore_index=True)
reviews_df.to_csv(Ptitle+'review_google.csv', encoding='utf-8')
#driver.close()
Because I do not know, I raise the phrases and paths of errors.
Traceback (most recent call last):
File "C:/Users/lobyp/Downloads/reviewex.py", line 51, in <module>
driver.find_element_by_xpath('//*[#id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').
click()
File "C:\Users\lobyp\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 78, in click
self._execute(Command.CLICK_ELEMENT)
File "C:\Users\lobyp\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 499, in _execute
return self._parent.execute(command, params)
File "C:\Users\lobyp\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 297, in execute
self.error_handler.check_response(response)
File "C:\Users\lobyp\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated","request":{"headers":{"Accept":"application/json","Accept-Encoding":"identity","Connection":"close","Content-Length":"81","Content-Type":"application/json;charset=UTF-8","Host":"127.0.0.1:53283","User-Agent":"Python http auth"},"httpVersion":"1.1","method":"POST","post":"{\"id\": \":wdc:1505888277040\", \"sessionId\": \"6b69b0a0-9dcb-11e7-bc02-87f5b92766da\"}","url":"/click","urlParsed":{"anchor":"","query":"","file":"click","directory":"/","path":"/click","relative":"/click","port":"","host":"","password":"","user":"","userInfo":"","authority":"","protocol":"","source":"/click","queryKey":{},"chunks":["click"]},"urlOriginal":"/session/6b69b0a0-9dcb-11e7-bc02-87f5b92766da/element/:wdc:1505888277040/click"}}
Screenshot: available via screen

I'm not an expert, but I believe that if an element has the hidden HTML attribute, then this exception will appear. I had the same issue with a drop down menu in a site once, I had to use the scroll event in Selenium to make the element visible.

Related

noob I am following this python selenium webscraping tutorial /watch?v=lTypMlVBFM4 I don't know why it won't get the title from youtube and print it

I am trying to scrap this site https://www.youtube.com/#JohnWatsonRooney/videos for the video titles.
This short explination does not help with the problem of the code but I thought I would say it anyway. I am doing this so I can get my foot in the door to then scrap other sites for prices on stuff so I can make a program that gives me a link to the cheapest priced product from a list several sites
as well as a ranking of all the other products and there names incase that wasn't the one I was looking for.
here is the terminal output.
Traceback (most recent call last):
File "C:\\Users\\PRO\\Documents\\dynamic_web_scraping_test.py", line 11, in \<module\>
videos = driver.find_element("name", "style-scope ytd-rich-grid-media")
File "C:\\Users\\PRO\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\selenium\\webdriver\\remote\\webdriver.py", line 830, in find_element
return self.execute(Command.FIND_ELEMENT, {"using": by, "value": value})\["value"\]
File "C:\\Users\\PRO\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\selenium\\webdriver\\remote\\webdriver.py", line 440, in execute
self.error_handler.check_response(response)
File "C:\\Users\\PRO\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py", line 245, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: \[name="style-scope ytd-rich-grid-media"\]
Stacktrace:
RemoteError#chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError#chrome://remote/content/shared/webdriver/Errors.sys.mjs:180:5
NoSuchElementError#chrome://remote/content/shared/webdriver/Errors.sys.mjs:392:5
element.find/\</\<#chrome://remote/content/marionette/element.sys.mjs:275:16
C:\\Users\\PRO\\Documents\>
here is my code
import time
from selenium import webdriver
url1 = 'https://www.youtube.com/#JohnWatsonRooney/videos'
url2 = 'https://www.youtube.com/#MyCraftyDen/videos'
driver = webdriver.Firefox()
driver.get(url1)
time.sleep(10)
videos = driver.find_element("name", "style-scope ytd-rich-grid-media")
for video in videos:
titles = video.find_element_by_xpath('//\*\[#id="video-title-link"\]').text
print(title)
I don't know what to try I did my best looking up this problem before posting. I know waiting for the browser to load the page doesnt work hense the time.sleep(10) in my code.
If you are looking to find the links of all the videos in the given page, you can use below code
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
url1 = 'https://www.youtube.com/#JohnWatsonRooney/videos'
url2 = 'https://www.youtube.com/#MyCraftyDen/videos'
driver = webdriver.Firefox()
driver.get(url1)
time.sleep(10)
videos = driver.find_elements(By.ID, "video-title-link")
for video in videos:
title = video.get_attribute("href")
print(title)

Selenium - how to continue with url list after hitting a offline website?

may I ask please with some assistance on the following task I'm trying to complete. Im trying to use Selenium to automatically check a list of urls and do a set of click actions on each of them, if the website is offline I would like to go to the next urls on the list and do the same click actions.
Bellow is the code that I have achieved so far, however it doesn't work:
from selenium import webdriver ## importing webdriver
import time ## importing waiting time fuctions
from selenium.common.exceptions import TimeoutException ## importing the timeout fuctions
driver = webdriver.Chrome()
## Setting the Timeout timer in seconds
MAX_TIMEOUT_SECONDS = 60
## Getting url from list "url"
urlsList = ['urlone','urltwo', 'urlthree', 'urlfour']
## Getting list of urls
for s in urlsList:
urls = str(s)
## loop with timeout exception
try:
## Activinting the Timeout fuctions
driver.set_page_load_timeout(MAX_TIMEOUT_SECONDS)
driver.get(urls)
## Clicking on tab
systemTab = driver.find_element_by_xpath('/html/body/ul[1]/li[3]/a')
systemTab.click()
## Clicking on Bottom
buttonRaul = driver.find_element_by_xpath('/html/body/ul[2]/li[4]/a')
buttonRaul.click()
## Clicking on confirmation
buttomGoo = driver.find_element_by_xpath('/html/body/div[2]/div[2]/button')
buttomGoo.click()
time.sleep(5) ## Wait 5 seconds
except TimeoutException:
continue
driver.quit()
I tried using timeout as form to determinate if a website is offline/not responding after 60 seconds, however when the I run the code, the code online work until it hits a off-line website and stops there.
I am trying to reply this question in java as I am not much familiar with Python.
When you run the test offline mode, we can use the loadingFailed() to get the logs from browser i.e – net::ERR_INTERNET_DISCONNECTED and then you can proceed to next URL.
import org.openqa.selenium.devtools*;
driver = new ChromeDriver();
devTools = ((ChromeDriver)driver).getDevTools();
driver.get("https://www.homepage.com");
devTools.createSession();
devTools.addListener(loadingFailed(), loadingFailed ->
{
System.out.println(loadingFailed.getErrorText());
Assert.assertEquals(loadingFailed.getErrorText(), "net::ERR_INTERNET_DISCONNECTED");
});
Im pretty new to Selenium and python, The first url runs as it should be, however my second url is currently offline, The second url open the window and after 10-20seconds the following message shows and the code stop running:
Traceback (most recent call last):
File "/Users/sweatynerd/Desktop/smtautomation.py", line 19, in <module>
driver.get(urls)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
self.execute(Command.GET, {'url': url})
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: net::ERR_ADDRESS_UNREACHABLE
(Session info: chrome=92.0.4515.107)
Thank for your help

Fill out form and download zipfile using Selenium in Python

EDITED: I incorporated the final lines suggested by Sushil. At the end, I am copying the output in my terminal. I still do not get the zipfile.
SOLVED: My error was due to an incompatibility between the driver and chrome versions. I fixed by following the instructions here: unknown error: call function result missing 'value' for Selenium Send Keys even after chromedriver upgrade
I am trying to use Selenium to fill out a form and download a zipfile.
After extensively googling, I have written a Python code, but I am currently unable to download the file. A browser opens, but nothing is filled out.
I am very new at Python, so I am guessing I am missing something very trivial since the website I am trying to get info from is super simple.
This is what I have tried:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(executable_path='/home/miranda/webscrap_Python/chromedriver')
#driver.wait = WebDriverWait(driver,5)
driver.get("http://www.cis.es/cis/opencms/EN/formulario.jsp?dwld=/Microdatos/MD3288.zip")
Name = '//*[#id="Nombre"]'
LastName = '//*[#id="Apellidos"]'
University = '//*[#id="profesion"]'
email = '//*[#id="Email"]'
ob_req = '//*[#id="objeto1"]'
terms = '//*[#id="Terminos"]'
download = '//*[#id="mediomicrodatos"]/form/div[3]/input'
driver.find_element_by_xpath(Name).send_keys("Miranda")
driver.find_element_by_xpath(LastName).send_keys("MyLastName")
driver.find_element_by_xpath(University).send_keys("MySchool")
driver.find_element_by_xpath(email).send_keys("my_email#gmail.com")
#lines added by Sushil:
ob_req_element = driver.find_element_by_xpath(ob_req) #Finds the ob_req element
driver.execute_script("arguments[0].click();", ob_req_element) #Scrolls down to the element and clicks on it
terms_element = driver.find_element_by_xpath(terms) #The same is repeated here
driver.execute_script("arguments[0].click();", terms_element)
driver.find_element_by_xpath(download).click() #Scrolling down is not needed for the download button as it would already be in view. Only when an element is not in view should we scroll down to it in order to click on it.
Output in my terminal:
Traceback (most recent call last):
File "myCode.py", line 18, in <module>
driver.find_element_by_xpath(Name).send_keys("Miranda")
File "/home/miranda/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 479, in send_keys
'value': keys_to_typing(value)})
File "/home/miranda/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "/home/miranda/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/miranda/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: call function result missing 'value'
(Session info: chrome=86.0.4240.75)
(Driver info: chromedriver=2.29.461571 (8a88bbe0775e2a23afda0ceaf2ef7ee74e822cc5),platform=Linux 5.4.0-45-generic x86_64)
For each and every element below the email element, you have to scroll down to click them. Here is the full code to do it:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
#driver.wait = WebDriverWait(driver,5)
driver.get("http://www.cis.es/cis/opencms/EN/formulario.jsp?dwld=/Microdatos/MD3288.zip")
Name = '//*[#id="Nombre"]'
LastName = '//*[#id="Apellidos"]'
University = '//*[#id="profesion"]'
email = '//*[#id="Email"]'
ob_req = '//*[#id="objeto1"]'
terms = '//*[#id="Terminos"]'
download = '//*[#id="mediomicrodatos"]/form/div[3]/input'
driver.find_element_by_xpath(Name).send_keys("Miranda")
driver.find_element_by_xpath(LastName).send_keys("MyLastName")
driver.find_element_by_xpath(University).send_keys("MySchool")
driver.find_element_by_xpath(email).send_keys("my_email#gmail.com")
#All the lines below were added by me
ob_req_element = driver.find_element_by_xpath(ob_req) #Finds the ob_req element
driver.execute_script("arguments[0].click();", ob_req_element) #Scrolls down to the element and clicks on it
terms_element = driver.find_element_by_xpath(terms) #The same is repeated here
driver.execute_script("arguments[0].click();", terms_element)
driver.find_element_by_xpath(download).click() #Scrolling down is not needed for the download button as it would already be in view. Only when an element is not in view should we scroll down to it in order to click on it.

StaleElementReferenceException when trying to click on the links in a loop

Please click on the link below to see the link "BEAUTY" on which I am clicking
1. I am using this code to click on the "Beauty" link
driver = webdriver.Chrome("C:\\Users\\gaurav\\Desktop\\chromedriver_win32\\chromedriver.exe")
driver.maximize_window()
driver.get("http://shop.davidjones.com.au")
object = driver.find_elements_by_name('topCategory')
for ea in object:
print ea.text
if ea.text == 'Beauty':
ea.click()
I am getting the following exceptions after clickin on the link succesfully , can anybody tell me why I am getting it ?
Traceback (most recent call last):
File "C:/Users/gaurav/PycharmProjects/RIP_CURL/login_raw.py", line 10, in <module>
print ea.text
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py", line 73, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py", line 493, in _execute
return self._parent.execute(command, params)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 252, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=63.0.3239.132)
(Driver info: chromedriver=2.34.522940 (1a76f96f66e3ca7b8e57d503b4dd3bccfba87af1),platform=Windows NT 6.2.9200 x86_64)
Try this:
from selenium import webdriver
print("bot started")
#chromeOptions = webdriver.ChromeOptions()
#driver = webdriver.Chrome(chrome_options=chromeOptions)
def specific_text(text, ea):
return str(text) == ea.text
driver = webdriver.Chrome("C:\\Users\\gaurav\\Desktop\\chromedriver_win32\\chromedriver.exe")
driver.maximize_window()
driver.get("http://shop.davidjones.com.au")
object_ = driver.find_elements_by_name('topCategory')
text_headers = [str(specific_text('Beauty', ea)) for ea in object_]
#print(text_headers)
index_text = text_headers.index("True")
#print(index_text)
object_[index_text].click()
You need to take care of certain factors as follows :
You have tried to create a List by the name object. object is a reserved built-in symbol in most of the Programming Languages. So as per Best Programming Practices we shouldn't use the name object.
The line print ea.text is badly indented. You need to add indentation.
Once you invoke click() on the WebElement with text as Beauty you need to break out of the loop.
Here is your own working code with some minor tweaks :
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r'C:\path\to\chromedriver.exe')
driver.maximize_window()
driver.get("http://shop.davidjones.com.au")
object1 = driver.find_elements_by_name('topCategory')
for ea in object1:
print (ea.text)
if ea.text == 'Beauty':
ea.click()
break
Console Output :
Sale
Brands
Women
Men
Shoes
Bags & Accessories
Beauty
There's an easier way to do this. You can use an XPath that will specify the category name you want to click. That way you don't have to loop, it will find the desired element in one search.
//span[#name='topCategory'][.='Beauty']
I'm assuming you will be reusing this code. In cases like this, I would create a function that takes a string parameter which would be the category name that you want to click. You feed that parameter into the XPath above and you can then click any category on the page.
I tested this and it's working.

Switching in between tabs and closing a tab in a window gives error

Her's what my code basically does(or i'm trying to do). Open a window, open a link from the page, fetch some data from the page and close the tab. The problem lies in closing the tab. Open the 2nd link again and perform the same operation again.
link.send_keys(Keys.CONTROL + 'w')
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py", line 323, in send_keys
self._execute(Command.SEND_KEYS_TO_ELEMENT, {'value': typing})
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webelement.py", line 404, in _execute
return self._parent.execute(command, params)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 195, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 170, in check_response
raise exception_class(message, screen, stacktrace)
StaleElementReferenceException: Message: Element belongs to a different frame than the current one - switch to its containing frame to use it
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
import time
from lxml import html
import requests
import xlwt
browser = webdriver.Firefox() # Get local session of firefox
# 0 wait until the pages are loaded
browser.implicitly_wait(3) # 3 secs should be enough. if not, increase it
browser.get("http://ae.bizdirlib.com/taxonomy/term/1493") # Load page
links = browser.find_elements_by_css_selector("h2 > a")
def test():#test function
elems = browser.find_elements_by_css_selector("div.content.clearfix > div > fieldset> div > ul > li > span")
browser.implicitly_wait(3) # 3 secs should be enough. if not, increase it
for elem in elems:
print elem.text
elem1 = browser.find_elements_by_css_selector("div.content.clearfix>div>fieldset>div>ul>li>a")
browser.implicitly_wait(3) # 3 secs should be enough. if not, increase it
for elems21 in elem1:
print elems21.text
return
for link in links:
link.send_keys(Keys.CONTROL + Keys.RETURN)
link.send_keys(Keys.CONTROL + Keys.PAGE_UP)
browser.switch_to_window(browser.window_handles[-1])
test() # Want to call test function
browser.implicitly_wait(3) # 3 secs should be enough. if not, increase it
# browser.quit()
browser.switch_to_window(browser.window_handles[0])
link.send_keys(Keys.CONTROL + 'w')
# browser.switch_to_window(browser.window_handles[0])
The switch_to_window function is used when you working on multiple windows and not tabs. Hence, using that function is useless. As per this link, till date, Selenium officially has no support for tabs
When you execute link.send_keys(Keys.CONTROL + 'w') notice that the link element does not belong to the current tab which is displayed. Hence, you should select a random element from the current tab and then call send_keys function.
Your for should be like this:
for link in links:
link.send_keys(Keys.CONTROL + Keys.RETURN)
link.send_keys(Keys.CONTROL + Keys.PAGE_UP)
test()
#Here, 'r' is the random element
r = browser.find_element_by_css_selector("h2 > a")
r.send_keys(Keys.CONTROL + 'w')

Categories