Unable to get element text using Selenium with Chrome - python

I'm trying to scrape Merriam-Webster's Medical Dictionary for medical terms using Python and Chrome as the Selenium webdriver. So far, this is what I have:
from os import path
from selenium import webdriver
# Adding an ad-blocker to Chrome to speed up page load times
options = webdriver.ChromeOptions()
options.add_extension(path.abspath("ublock-origin.crx"))
# Declaring the Selenium webdriver
driver = webdriver.Chrome(chrome_options = options)
# Fetching the "A" terms as a test set
driver.get("https://www.merriam-webster.com/browse/medical/a")
scraped_words = [] # The list that will hold each word
page_num = 1
while page_num < 55: # There are 54 pages of "A" terms
try:
for i in range(4): # There are 3 columns per page of words
column = "/html/body/div/div/div[5]/div[2]/div[1]/div/div[3]/ul/li[" + str(i) + "]/a"
number_of_words = len(driver.find_elements_by_xpath(column))
for j in range(number_of_words):
word = driver.find_elements_by_xpath(column + "[" + str(j) + "]")
scraped_words.append(word)
driver.find_element_by_class_name("fa-angle-right").click() # Next page
page_num += 1 # Increment page number to keep track of current page
except:
driver.close()
# Write out words to a file
with open("medical_terms.dict", "w") as text_file:
for i in range(len(scraped_words)):
text_file.write(str(scraped_words[i]))
text_file.write("\n")
driver.close()
The above code fetches all the items, as the output of len(scraped_words) is the number expected. However, since I did not specify that I wanted to fetch the text of the elements, I get element identifiers (I think?) instead of text. If I decide to use word = driver.find_elements_by_xpath(column + "[" + str(j) + "]").text in order to specify that I want to get the text of the element, I get the following error:
Traceback (most recent call last):
File "mw_download.py", line 20, in <module>
number_of_words = len(driver.find_elements_by_xpath(column))
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 325, in find_elements_by_xpath
return self.find_elements(by=By.XPATH, value=xpath)
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 817, in find_elements
'value': value})['value']
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: no such session
(Driver info: chromedriver=2.31.488774 (7e15618d1bf16df8bf0ecf2914ed1964a387ba0b),platform=Mac OS X 10.12.6 x86_64)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "mw_download.py", line 27, in <module>
driver.close()
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 541, in close
self.execute(Command.CLOSE)
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 256, in execute
self.error_handler.check_response(response)
File "/usr/local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: no such session
(Driver info: chromedriver=2.31.488774 (7e15618d1bf16df8bf0ecf2914ed1964a387ba0b),platform=Mac OS X 10.12.6 x86_64)
What is strange to me here is that the only code I change between runs is on line 22 yet the error message points out line 20 instead.
Any help in deciphering what's going on here and what I can do to fix it would be much appreciated! :+)

You just need to create a words list accessing your elements texts, changing:
word = driver.find_elements_by_xpath(column + "[" + str(j) + "]")
to:
word = [i.text for i in driver.find_elements_by_xpath(column + "[" + str(j) + "]")]
Because .find_elements_by_xpath will always return a list, accessing .text directly won't work.

Related

python selenium Failed to execute 'getComputedStyle' on 'Window': parameter 1 is not of type 'Element'

So I'm getting the error below using python selenium. I want to build a browsergame bot and I'm currently stuck here. Tried to do it mostly with the oop technique, but this didn't really work out for me since I had the bug only since I used the classes ;)
Failed to execute 'getComputedStyle' on 'Window': parameter 1 is not of type 'Element'
My code:
class Buildables():
def __repr__(self):
return self.sparte, self.toolTip
def __init__(self, ref, sparte, *args, **kwargs):
self.toolTip = lambda: wait.until(
EC.presence_of_element_located((By.XPATH, ('//*[#ref=\'' +
str(ref) + '\']'))))
self.LVL = lambda: int(
wait.until(EC.presence_of_element_located((By.XPATH, ('//*
[#ref=\'' + str(ref) + '\']/span/span')))).text)
self.ausbauButton = lambda: wait.until(
EC.presence_of_element_located((By.XPATH,
('//*[contains(text(),\' Ausbauen\')]'))))
self.sparte = lambda : driver.find_element_by_xpath('//*
[contains(text(),'+ sparte +')]')
metalMine = Buildables(1,'Versorgung')
a = c.metalMine.sparte()
a.click()
What do I do wrong?
BTW full error:
Traceback (most recent call last):
File "C:/Users/leosc/PycharmProjects/ogameBot/ogame_main.py", line 5, in <module>
cycles.buildMinesCycle()
File "C:\Users\leosc\PycharmProjects\ogameBot\cycles.py", line 132, in buildMinesCycle
a.click()
File "C:\Users\leosc\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 80, in click
self._execute(Command.CLICK_ELEMENT)
File "C:\Users\leosc\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webelement.py", line 633, in _execute
return self._parent.execute(command, params)
File "C:\Users\leosc\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\leosc\AppData\Local\Programs\Python\Python37-32\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.JavascriptException: Message: javascript error: Failed to execute 'getComputedStyle' on 'Window': parameter 1 is not of type 'Element'.
(Session info: chrome=72.0.3626.121)
(Driver info: chromedriver=2.46.628402 (536cd7adbad73a3783fdc2cab92ab2ba7ec361e1),platform=Windows NT 10.0.17134 x86_64)

Python selenium selenium.common.exceptions.StaleElementReferenceException:

I have this weird error with selenium when I try to find sportsbook odds from oddsportal.com. It looks like selenium object does not work like a normal list and I can not just loop over every url. Test url that should work http://www.oddsportal.com/soccer/england/premier-league/ (Script is not written for home-draw-away odds).
So what am I doing wrong here?
My script:
from selenium import webdriver
from selenium.common.exceptions import NoSuchAttributeException,NoSuchElementException
from selenium.webdriver.common.keys import Keys
class Odds():
def odds(self,driver,url):
kertoimet = ['','']
driver.get(url)
odds = driver.find_elements_by_xpath("""//*[#id="odds-data table"]/div/table/tbody/tr""")
for item in odds:
data = item.text.replace(' ','').split('\n')
if data[0] == 'Pinnacle':
kertoimet = [data[1],data[2]]
return kertoimet
def odds_finder(self,data,driver):
for item in data:
if item.get_attribute('href') != '':
print(Odds().odds(driver,str(item.get_attribute('href'))))
def url_finder2(self,URL):
driver = webdriver.Chrome("/usr/local/bin/chromedriver 2")
driver.get(URL) #http://www.oddsportal.com/soccer/england/premier-league/
data = driver.find_elements_by_xpath("""//*[#id="tournamentTable"]/tbody/tr/td/a""")
Odds().odds_finder(list(data),driver)
Odds().url_finder2(URL)
Error:
Traceback (most recent call last):
File "odds.py", line 79, in <module>
Odds().url_finder2(open('oddsportal_odds.csv'))
File "odds.py", line 61, in url_finder2
Odds().odds_finder(list(data),driver)
File "odds.py", line 49, in odds_finder
if item.get_attribute('href') != '':
File "/Library/Python/2.7/site-
packages/selenium/webdriver/remote/webelement.py", line 141, in
get_attribute
resp = self._execute(Command.GET_ELEMENT_ATTRIBUTE, {'name': name})
File "/Library/Python/2.7/site-
packages/selenium/webdriver/remote/webelement.py", line 494, in
_execute
return self._parent.execute(command, params)
File "/Library/Python/2.7/site-
packages/selenium/webdriver/remote/webdriver.py", line 236, in execute
self.error_handler.check_response(response)
File "/Library/Python/2.7/site-
packages/selenium/webdriver/remote/errorhandler.py", line 192, in
check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message:
stale element reference: element is not attached to the page document
(Session info: chrome=58.0.3029.110)
(Driver info: chromedriver=2.29.461585
(0be2cd95f834e9ee7c46bcc7cf405b483f5ae83b),platform=Mac OS X 10.12.3
x86_64)
You just need to call data again because the state gets changed.
Try modifying this 2 function.
def odds_finder(self,driver):
for item in driver.find_elements_by_xpath("//*[#id="tournamentTable"]/tbody/tr/td/a"):
time.sleep(5)
if item.get_attribute('href') != '':
print(Odds().odds(driver, str(item.get_attribute('href'))))
def url_finder2(self, URL):
driver = webdriver.Chrome("/usr/local/bin/chromedriver 2")
driver.get(URL) # http://www.oddsportal.com/soccer/england/premier-league/
Odds().odds_finder(driver)

How i can i iterate through data on website

I want to extract users from a website bu using for loop, but i don't know how can i put correctly "i" instead of number 1
after i put
user_id = browser.find_element_by_xpath("(//div[#class='_gzjax'])["+str(i)+"]").text
Traceback (most recent call last): File "D:/Code/Python/Instagram
Unfollow/Instagram Unfollow.py", line 32, in
user_id = browser.find_element_by_xpath("(//div[#class='_gzjax'])["+str(i)+"]").text
File
"C:\Python33\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 293, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath) File "C:\Python33\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 752, in find_element
'value': value})['value'] File "C:\Python33\lib\site-packages\selenium\webdriver\remote\webdriver.py",
line 236, in execute
self.error_handler.check_response(response) File "C:\Python33\lib\site-packages\selenium\webdriver\remote\errorhandler.py",
line 192, in check_response
raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: Unable to
locate element:
{"method":"xpath","selector":"(//div[#class='gzjax'])[0]"}
Stacktrace:
at FirefoxDriver.prototype.findElementInternal (file:///c:/users/viktor/appdata/local/temp/tmp1r0vgw/extensions/fxdriver#googlecode.com/components/driver-component.js:10770)
at FirefoxDriver.prototype.findElement (file:///c:/users/viktor/appdata/local/temp/tmp1r0vgw/extensions/fxdriver#googlecode.com/components/driver-component.js:10779)
at DelayedCommand.prototype.executeInternal_/h (file:///c:/users/viktor/appdata/local/temp/tmp1r0vgw/extensions/fxdriver#googlecode.com/components/command-processor.js:12661)
at DelayedCommand.prototype.executeInternal_ (file:///c:/users/viktor/appdata/local/temp/tmp1r0vgw/extensions/fxdriver#googlecode.com/components/command-processor.js:12666)
at DelayedCommand.prototype.execute/< (file:///c:/users/viktor/appdata/local/temp/tmp1r0vgw/extensions/fxdriver#googlecode.com/components/command-processor.js:12608)
Just concatenate i (first cast it to string) to your desired string:
for i in range (0,100):
user_id = browser.find_element_by_xpath("(//div[#class='_gzjax'])["+str(i)+"]").text
print(user_id)
Try this. I think you concatenate variables into string with plus sign in python:
user_id = browser.find_element_by_xpath("(//div[#class='_gzjax'])["+i+"]").text
see how we add ("string" + variable + "string")
You would need to explicitly convert i from int to string. How about:
for i in range (0,100):
user_id = browser.find_element_by_xpath("(//div[#class='_gzjax'])["+str(i)+"]").text
print(user_id)
I don't know how many users there are on the page, but as it stands you're bound to 101.
Could just loop through the elements?
for i in browser.find_element_by_xpath("(//div[#class='_gzjax'])"):
user_id = i.text
print(user_id)
Something along those lines where you are iterating over items instead of indices.

Got runUntilCurrent Error Message when running Scrapy Spider with Selenium

I'm a beginner in Scrapy. I want to collect links of items in the index page and get the information from the item pages. Because I need to deal with the javascript on the index page, I use selenium webdriver with scrapy. Here's my code in progress.py .
from scrapy.spider import Spider
from scrapy.http import Request
from selenium import selenium
from selenium import webdriver
from mustdo.items import MustdoItem
import time
class ProgressSpider(Spider):
name = 'progress' # spider's name
allowed_domains = ['example.com'] # crawling domain
start_urls = ['http://www.example.com']
def __init__(self):
Spider.__init__(self)
self.log('----------in __init__----------')
self.driver = webdriver.Firefox()
def parse(self, response):
self.log('----------in parse----------')
self.driver.get(response.url)
# Here're some operations of self.driver with javascript.
elements = []
elements = self.driver.find_elements_by_xpath('//table/tbody/tr/td/a[1]')
#get the number of the item
self.log('----------Link number is----------'+str(len(elements)))
for element in elements:
#get the url of the item
href = element.get_attribute('href')
print href
self.log('----------next href is ----------'+href)
yield Request(href,callback=self.parse_item)
self.driver.close()
def parse_item(self, response):
self.log('----------in parse_item----------')
self.driver.get(response.url)
#build item
item = MustdoItem()
item['title'] = self.driver.find_element_by_xpath('//h2').text
self.log('----------item created----------'+self.driver.find_element_by_xpath('//h2').text)
time.sleep(10)
return item
Also, I have items.py defining the MustdoItem used here. Here's the code.
from scrapy.item import Item, Field
class MustdoItem(Item):
title = Field()
When I run the spider, I can get several items (probably 6 to 7 out of 20). But after a while, I get error messages as below.
Traceback (most recent call last):
File "F:\Python27\lib\site-packages\twisted\internet\base.py", line 82
4, in runUntilCurrent
call.func(*call.args, **call.kw)
File "F:\Python27\lib\site-packages\twisted\internet\task.py", line 63
8, in _tick
taskObj._oneWorkUnit()
File "F:\Python27\lib\site-packages\twisted\internet\task.py", line 48
4, in _oneWorkUnit
result = next(self._iterator)
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\uti
ls\defer.py", line 57, in <genexpr>
work = (callable(elem, *args, **named) for elem in iterable)
--- <exception caught here> ---
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\uti
ls\defer.py", line 96, in iter_errback
yield next(it)
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\con
trib\spidermiddleware\offsite.py", line 23, in process_spider_output
for x in result:
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\con
trib\spidermiddleware\referer.py", line 22, in <genexpr>
return (_set_referer(r) for r in result or ())
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\con
trib\spidermiddleware\urllength.py", line 33, in <genexpr>
return (r for r in result or () if _filter(r))
File "F:\Python27\lib\site-packages\scrapy-0.22.2-py2.7.egg\scrapy\con
trib\spidermiddleware\depth.py", line 50, in <genexpr>
return (r for r in result or () if _filter(r))
File "mustdo\spiders\progress.py", line 32, in parse
print element.tag_name
File "F:\Python27\lib\site-packages\selenium\webdriver\remote\webeleme
nt.py", line 50, in tag_name
return self._execute(Command.GET_ELEMENT_TAG_NAME)['value']
File "F:\Python27\lib\site-packages\selenium\webdriver\remote\webeleme
nt.py", line 369, in _execute
return self._parent.execute(command, params)
File "F:\Python27\lib\site-packages\selenium\webdriver\remote\webdrive
r.py", line 164, in execute
self.error_handler.check_response(response)
File "F:\Python27\lib\site-packages\selenium\webdriver\remote\errorhan
dler.py", line 164, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: u'El
ement not found in the cache - perhaps the page has changed since it was looked
up' ; Stacktrace:
at fxdriver.cache.getElementAt (resource://fxdriver/modules/web_elem
ent_cache.js:7610)
at Utils.getElementAt (file:///c:/users/marian/appdata/local/temp/tm
pmgnqid/extensions/fxdriver#googlecode.com/components/command_processor.js:7210)
at WebElement.getElementTagName (file:///c:/users/marian/appdata/loc
al/temp/tmpmgnqid/extensions/fxdriver#googlecode.com/components/command_processo
r.js:10353)
at DelayedCommand.prototype.executeInternal_/h (file:///c:/users/mar
ian/appdata/local/temp/tmpmgnqid/extensions/fxdriver#googlecode.com/components/c
ommand_processor.js:10878)
at DelayedCommand.prototype.executeInternal_ (file:///c:/users/maria
n/appdata/local/temp/tmpmgnqid/extensions/fxdriver#googlecode.com/components/com
mand_processor.js:10883)
at DelayedCommand.prototype.execute/< (file:///c:/users/marian/appda
ta/local/temp/tmpmgnqid/extensions/fxdriver#googlecode.com/components/command_pr
ocessor.js:10825)
I've tested my codes and found out that if I removed "yield Request(href,callback=self.parse_item)" in parse function, I could get all the links of items. And when "progress.py" was running, I observed that after the first print of "----------in parse_item----------" in self.log, the error messages came out. With my inference, yield sequence results in the error. But I don't know how to deal with this problem.
Any insight is appreciated!
Best regards! :)

WebDriver. Python. I want to store one of the option of drop-down menu by value as text, how to do it?

I want to store one of the option of drop-down menu by value as text. I select random option by next Python script:
#Random select option by value
assignJob = Select(driver.find_element_by_name('job[job_title]'))
jobValue = str(randint(1, 6))
assignJob.select_by_value(jobValue)
HTML code:
<select name="job[job_title]" class="formSelect valid" id="job_job_title">
<option value="" selected="selected">-- Select --</option>
<option value="1">Customer Service</option>
<option value="4">QA Engineer</option>
<option value="3">QA Manager</option>
<option value="2">SDET</option>
<option value="5">Software Developer</option>
<option value="6">Software Development Manager</option>
</select>
Error when I assign variable storedJob = driver.find_element_by_css_selector("#job_job_title option[value=jobValue]").text:
Traceback (most recent call last):
File "code.py", line 62, in <module>
storedJob = driver.find_element_by_css_selector("#job_job_title option[value
=jobValue]").text
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 365, in find_element_by_css_selector
return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 681, in find_element
{'using': by, 'value': value})['value']
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 164, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\errorhandler.py", line 164, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: u'no such element\n
(Session info: chrome=32.0.1700.102)\n (Driver info: chromedriver=2.8.241075,p
latform=Windows NT 6.1 SP1 x86)'
Code:
assignJob = Select(driver.find_element_by_name('job[job_title]'))
jobValue = str(randint(1, 6))
assignJob.select_by_value(jobValue)
storedJob = driver.find_element_by_css_selector("#job_job_title option[value=jobValue]").text
print storedJob
Another Error:
Traceback (most recent call last):
File "code.py", line 63, in <module>
storedJob = driver.find_element_by_css_selector(jobValueSelector).text
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 365, in find_element_by_css_selector
return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 681, in find_element
{'using': by, 'value': value})['value']
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\webdriver.py", line 164, in execute
self.error_handler.check_response(response)
File "C:\Python27\lib\site-packages\selenium-2.39.0-py2.7.egg\selenium\webdriv
er\remote\errorhandler.py", line 164, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.InvalidElementStateException: Message: u"invalid elem
ent state: Failed to execute query: '#job_job_title option[value=2]' is not a va
lid selector.\n (Session info: chrome=32.0.1700.102)\n (Driver info: chromedri
ver=2.8.241075,platform=Windows NT 6.1 SP1 x86)"
You could use a CSS selector, to locate that option and ask selenium to return its text.
driver.find_element_by_css_selector("#job_job_title option[value=2]").text
Replace 2 with the variable jobValue for you specific scenario.
EDIT
jobValue is a variable(str), you cannot directly pass a variable in Python.
jobValueSelector = "#job_job_title option[value='%s']" %jobValue
storedJob = driver.find_element_by_css_selector(jobValueSelector).text
print storedJob
The piece of code below is in Ruby.. I think Python and Ruby is almost the same.
select_list = driver.find_element(:id, 'job_job_title')
options = select_list.find_elements(:tag_name, 'option')
index = rand(1..options.count) # Get random number from index 1 to options.count
my_variable = options[index]
Let me try my skill in Python (not tested)! :)
select_list = driver.find_element_by_id("job_job_title")
options = select_list.find_elements_by_tag_name("option")
index = randint(1, len(options))
variable = options[index]

Categories