I'm trying to make a script where the program takes input of multiple URLs and then opens tabs for each of them, this is what I came up with
s=raw_input()
l=s.split()
t=len(l)
for elements in l:
elements = ["https://" + elements + "" for elements in l]
driver = webdriver.Chrome(r"C:/Users/mynam/Desktop/WB/chromedriver.exe")
driver.get("https://www.google.com")
for e in elements:
driver.implicitly_wait(3)
driver.execute_script("window.open(e,'new window')")
print "Opened in new tab"
I get an error of e not defined, how do we pass an argument to window.open in selenium
You need to execute new window, switch to it and open new page.
from selenium import webdriver
import os
def open_tab_page(page, page_number):
browser.execute_script("window.open('');")
browser.switch_to.window(browser.window_handles[page_number])
browser.get(page)
# initialise driver
chrome_driver = os.path.abspath(os.path.dirname(__file__)) + '/chromedriver'
browser = webdriver.Chrome(chrome_driver)
browser.get("http://stackoverflow.com/")
# list of pages to open
pages_list = ['https://www.google.com', 'https://www.youtube.com/']
page_number = 1
for page in pages_list:
open_tab_page(page, page_number)
page_number +=1
Related
I wanted to extract text from multiple pages. Currently, I am able to extract data from the first page but I want to append and go to muliple pages and extract the data from pagination. I have written this simple code which extracts data from the first page. I am not able to extract the data from multiple pages which is dynamic in number.
`
element_list = []
opts = webdriver.ChromeOptions()
opts.headless = True
driver = webdriver.Chrome(ChromeDriverManager().install())
base_url = "XYZ"
driver.maximize_window()
driver.get(base_url)
driver.set_page_load_timeout(50)
element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.ID, 'all-my-groups')))
l = []
l = driver.find_elements_by_xpath("//div[contains(#class, 'alias-wrapper sim-ellipsis sim-list--shortId')]")
for i in l:
print(i.text)
`
I have shared the images of class if this could help from pagination.
If we could extract the automate and extract from all the pages that would be awesome. Also, I am new so please pardon me for asking silly questions. Thanks in advance.
You have provided the code just for the previous page button. I guess you need to go to the next page until next page exists. As I don't know what site we are talking about I can only guess its behavior. So I'm assuming the button 'next' disappears when no next page exists. If so, it can be done like this:
element_list = []
opts = webdriver.ChromeOptions()
opts.headless = True
driver = webdriver.Chrome(ChromeDriverManager().install())
base_url = "XYZ"
driver.maximize_window()
driver.get(base_url)
driver.set_page_load_timeout(50)
element = WebDriverWait(driver, 50).until(EC.presence_of_element_located((By.ID, 'all-my-groups')))
l = []
l = driver.find_elements_by_xpath("//div[contains(#class, 'alias-wrapper sim-ellipsis sim-list--shortId')]")
while True:
try:
next_page = driver.find_element(By.XPATH, '//button[#label="Next page"]')
except NoSuchElementException:
break
next_page.click()
l.extend(driver.find_elements(By.XPATH, "//div[contains(#class, 'alias-wrapper sim-ellipsis sim-list--shortId')]"))
for i in l:
print(i.text)
To be able to catch the exception this import has to be added:
from selenium.common.exceptions import NoSuchElementException
Also note that the method find_elements_by_xpath is deprecated and it would be better to replace this line:
l = driver.find_elements_by_xpath("//div[contains(#class, 'alias-wrapper sim-ellipsis sim-list--shortId')]")
by this one:
l = driver.find_elements(By.XPATH, "//div[contains(#class, 'alias-wrapper sim-ellipsis sim-list--shortId')]")
How can I open a new window and close the previous one, then open again another window and close the previous one.
The number of links is indeterminate.
urls = ["https://www.oxxo.com/",
"https://pypi.org/project/fake-useragent/",
"https://www.youtube.com/"]
for posts in range(len(urls)):
print(posts)
driver.get(urls[posts])
driver.implicitly_wait(val)
timestamp = datetime.datetime.now().strftime('%d_%m_%Y')
chars = [':','/','%']
image_name = driver.current_url
for char in chars:
image_name = image_name.replace(char,'_')
driver.save_screenshot(str(cont)+'_'+image_name+'_'+timestamp+'.png')
cont += 1
if(posts!=len(urls)-1):
driver.execute_script("window.open('');")
chwd = driver.window_handles
driver.switch_to.window(chwd[-1])
driver.close()
The urls within the list urls are independent. So neither you need a reference nor you need to open them in the adjacent tab. You can easily invoke them once by one and do your task as follows:
driver = webdriver.Chrome(service=s, options=options)
urls = ["https://www.oxxo.com/",
"https://pypi.org/project/fake-useragent/",
"https://www.youtube.com/"]
for url in urls:
driver.get(url)
time.sleep(3) # for demonstaration
print("Do whatever you wish")
driver.quit()
I'm trying to go to every next page using the below code.
it collects data from page Number 1. but when I try to loop it and go to the next page it gives me an error.
Web page : https://register.fca.org.uk/s/search?q=capital&type=Companies
this is the code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
url = 'https://register.fca.org.uk/s/search?q=capital&type=Companies'
service = Service('linkto crome driver')
service.start()
driver = webdriver.Remote(service.service_url)
driver.get(url)
time.sleep(12)
for j in range(346):
divs = driver.find_elements_by_xpath('//div[#class="result-card_main"]')
for i in range(len(divs)):
time.sleep(10)
d = driver.find_elements_by_xpath('//div[#class="result-card_main"]')
RN = ''
d[i].click()
time.sleep(12)
try:
RNData = driver.find_elements_by_xpath('//*[#id="profile-header"]/div[1]/div/div/div/div/div/div[1]/div[2]/div/div')
RN = RNData[0].text.split(':')[1].strip()
print(RN)
except Exception as e5:
pass
if i == (len(divs) - 1):
pass
else:
driver.execute_script("window.history.go(-1)")
bt = driver.find_elements_by_xpath('//*[#id="-pagination-next-btn"]')
bt[0].click()
This is the error:
IndexError: list index out of range
How can I solve this problem?
I guess the problem is as following:
bt = driver.find_element_by_xpath('//*[#id="-pagination-next-btn"]')
returns a single web element object, it's not a list, so you can't apply indexing on it with bt[0]
UPD:
After changing from find_element_by_xpath to find_elements_by_xpath you still getting IndexError: list index out of range there because you were in the inner page and performed driver back action.
Immediately after that you are trying to get the next page button while the main page is still not loaded. This actually returns you an empty list
bt = driver.find_elements_by_xpath('//*[#id="-pagination-next-btn"]')
that's why you can't apply bt[0] on an empty list object.
Your problem is this:
if i == (len(divs) - 1):
pass
else:
driver.execute_script("window.history.go(-1)")
After clicking the last link, you are not navigating back to the initial page, which is where the pagination button is. I don't think you need this condition at all, so your code could be:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
url = 'https://register.fca.org.uk/s/search?q=capital&type=Companies'
service = Service('linkto crome driver')
service.start()
driver = webdriver.Remote(service.service_url)
driver.get(url)
time.sleep(12)
for j in range(346):
divs = driver.find_elements_by_xpath('//div[#class="result-card_main"]')
for i in range(len(divs)):
time.sleep(10)
d = driver.find_elements_by_xpath('//div[#class="result-card_main"]')
RN = ''
d[i].click()
time.sleep(12)
try:
RNData = driver.find_elements_by_xpath('//*[#id="profile-header"]/div[1]/div/div/div/div/div/div[1]/div[2]/div/div')
RN = RNData[0].text.split(':')[1].strip()
print(RN)
except Exception as e5:
pass
driver.execute_script("window.history.go(-1)")
bt = driver.find_elements_by_xpath('//*[#id="-pagination-next-btn"]')
bt[0].click()
I'm having an issue trying to click on an a href tag from an xpath query, the line in question is element = atag.xpath("./a"), I get an error saying Error: 'list' object has no attribute 'click'.
Any help greatly appreciated.
import time
import os.path
import lxml.html as LH
import re
import sys
from selenium import webdriver
from random import randint
PARAMS = sys.argv
URL = PARAMS[1]
BASEURL = URL[:URL.rfind('/')+1]
try:
PAGE_NUMBER = 1
#--------------------------------------------------
## Get initial page
driver = webdriver.Firefox()
driver.get(PARAMS[1])
#--------------------------------------------------
## Get page count
# Give page time to load
time.sleep(2)
PAGE_RAW = driver.page_source
PAGE_RAW = LH.fromstring(PAGE_RAW)
PAGE_COUNT_RAW = PAGE_RAW.xpath("//div[contains(#class, 'menu')]/div/ul/li")
PAGE_COUNT = len(PAGE_COUNT_RAW) - 2
#--------------------------------------------------
## Get page if it's not page one
while PAGE_NUMBER <= PAGE_COUNT:
#--------------------------------------------------
# Delay page processing for a random number of seconds from 2-5
time.sleep(randint(2,5))
#--------------------------------------------------
## Create empty file
FILE_NAME = PARAMS[3] + 'json/' + time.strftime("%Y%m%d%H") + '_' + str(PARAMS[2]) + '_' + str(PAGE_NUMBER) + '.json'
#--------------------------------------------------
## Create JSON file if it doesn't exist
if os.path.exists(FILE_NAME)==False:
JSON_FILE = open(FILE_NAME, "a+", encoding="utf-8")
else:
JSON_FILE = open(FILE_NAME, "w", encoding="utf-8")
JSON_FILE.write("{")
#--------------------------------------------------
# Click page for next page if not page 1
if PAGE_NUMBER > 1:
index = 1
for atag in PAGE_COUNT_RAW:
if index == (PAGE_NUMBER + 1):
element = atag.xpath("./a")
element.click()
index += 1
#--------------------------------------------------
## Proces page
#TODO
#--------------------------------------------------
## Close webdriver
driver.quit()
#--------------------------------------------------
## Close JSON file
JSON_FILE.write("}")
JSON_FILE.close()
#--------------------------------------------------
## Increment page number
PAGE_NUMBER += 1
#--------------------------------------------------
except Exception as e:
print('Error: ' + str(e.args[0]))
You mixed lxml code with selenium code. Your element is a list returned by lxml code, it's not a WebElement or list of WebElements and you can't apply click() even if you try element[0].click().
I'd suggest you to avoid using lxml as it seem to be redundant in this case. Just try to parse page source with selenium built-in methods.
If you need to get list of div elements you can use:
PAGE_COUNT_RAW = driver.find_elements_by_xpath("//div[contains(#class, 'menu')]/div/ul/li")
To find child a element:
for div in PAGE_COUNT_RAW:
element = div.find_element_by_xpath('./a')
Note that if you defined PAGE_COUNT_RAW on the first page, it will not be accessible on the next page, so you can scrape just a list of links and then get each link in a loop. Something like:
links = [link.get_attribute('href') for link in driver.find_elements_by_xpath("//div[contains(#class, 'menu')]/div/ul/li/a")]
for link in links:
driver.get(link)
If you need more details then update your ticket with specific description as for now your problem is not quite clear
For example a site
I need to script clicking "close" button on an appeared frame.
I'm already tried using xpath, css_selection still worthless.
Need to do stuff using headless-browser, like HtmlUnit
Because there is no "a"-tag.
from selenium import webdriver
from lxml import etree, html
url = "http://2gis.ru/moscow/search/%D1%81%D0%BF%D0%BE%D1%80%D1%82%D0%B8%D0%B2%D0%BD%D1%8B%D0%B5%20%D1%81%D0%B5%D0%BA%D1%86%D0%B8%D0%B8/center/37.437286%2C55.753395/tab/firms/zoom/11"
driver = webdriver.Firefox()
#driver = webdriver.Remote(desired_capabilities=webdriver.DesiredCapabilities.HTMLUNIT)
driver.get(url)
content = (driver.page_source).encode('utf-8')
doc = html.fromstring(content)
elems = doc.xpath('//article[#data-module="miniCard"]')
elem = elems[0]
# get element id to click on
el1_id = elem.attrib['id']
#simulate click to open frame
el1_to_click = driver.find_element_by_xpath('//article[#id="{0}"]//\
a[contains(#class, "miniCard__headerTitle")]'.format(el1_id))
el1_to_click.click()
#some stuff
pass
#now need to close this
close = driver.find_element_by_xpath('//html/body/div[1]/div/div[1]/div[2]/div[2]/div[2]/div/div[2]/div/div[2]/div[3]/div[1]/div[2]/svg/use')
close.click()
But the last part isn't working (can't close frame).
How to do this ?
Try this. this should work.
from selenium import webdriver
from lxml import etree, html
url = "http://2gis.ru/moscow/search/%D1%81%D0%BF%D0%BE%D1%80%D1%82%D0%B8%D0%B2%D0%BD%D1%8B%D0%B5%20%D1%81%D0%B5%D0%BA%D1%86%D0%B8%D0%B8/center/37.437286%2C55.753395/tab/firms/zoom/11"
driver = webdriver.Firefox()
driver.implicitly_wait(10)
# driver = webdriver.Remote(desired_capabilities=webdriver.DesiredCapabilities.HTMLUNIT)
driver.get(url)
content = (driver.page_source).encode('utf-8')
doc = html.fromstring(content)
elems = doc.xpath('//article[#data-module="miniCard"]')
elem = elems[0]
# get element id to click on
el1_id = elem.attrib['id']
# simulate click to open frame
el1_to_click = driver.find_element_by_xpath('//article[#id="{0}"]//\
a[contains(#class, "miniCard__headerTitle")]'.format(el1_id))
el1_to_click.click()
# some stuff
pass
# now need to close this
close = driver.find_element_by_xpath(
'//div[#class="frame _num_2 _pos_last _moverDir_left _active _show _state_visible _ready _cont_firmCard _mover"]/div/div/div[#class="frame__controlsButton _close"]')
close.click()
close = driver.find_element_by_xpath('//div[#data-module="frame"]/\
div[#class="frame__content"]/div[#class="frame__controls"]/\
div[contains(#class, "frame__controlsButton")\
and contains(#class,"_close")]')
That is the answer for any driver.set_window_size()
But need to find headless.