I am trying to scrape all the apps url from the target page:- https://play.google.com/store/apps?device= using the below code:-
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
from tqdm import tqdm
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.maximize_window()
items = ['phone','tablet','tv','chromebook','watch','car']
target_url = "https://play.google.com/store/apps?device="
all_apps = []
for cat in tqdm(items):
driver.get(target_url+cat)
time.sleep(2)
new_height = 0
last_height = 0
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
# Wait to load page
time.sleep(4)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
# break condition
if new_height == last_height:
break
last_height = new_height
for i in driver.find_elements(By.XPATH,"//a[contains(#href,'/store/apps/details')]"):
all_apps.append(i.get_attribute('href'))
The above code scrolls the page upside down and gives me the URLs of all the apps available on the page. However,
I tried to click the element using the below code but getting error:
driver.find_element(By.XPATH,"//i[contains(text(),'chevron_right')]").click()
error:-
ElementNotInteractableException: Message: element not interactable
(Session info: chrome=110.0.5481.77)
I tried using the below code:-
element = driver.find_element(By.XPATH,"//div[#class='bewvKb']") #any icon, may be that whatsapp icon here
hover = ActionChains(driver).move_to_element(element)
hover.perform()
element = driver.find_element(By.XPATH,"//i[text()='chevron_right']")
element.click()
There is no option to click on the highlighted button as shown in the image. Can anyone help me with this like how to scroll the page sideways so that all the contents can be scraped from the page?
The problem, that right arrow icon appears only when you hover upon any of the icon which is on the line. So first hover any of the icon, so that right arrow would appear and then issue the click, mostly like this
element = driver.find_element_by_css_selector("#yDmH0d > c-wiz.SSPGKf.glB9Ve > div > div > div.N4FjMb.Z97G4e > c-wiz > div > c-wiz > c-wiz:nth-child(1) > c-wiz > section > div > div > div > div > div > div.aoJE7e.b0ZfVe > div:nth-child(1) > div > div > a > div.TjRVLb > img") #any icon, may be that whatsapp icon here
hover = ActionChains(driver).move_to_element(element)
hover.perform()
element = driver.find_element_by_xpath("//i[text()='chevron_right']")
element.click()
I know nike can detect when selenium is used but even with undected Chromedriver I still cant put things in my cart. I can click the button but nothing happens. Is there any way to avoid this?
item is just an example
My Code:
import undetected_chromedriver.v2 as uc
from selenium.webdriver.chrome.options import Options
import time
from selenium.webdriver.common.keys import Keys
options = uc.ChromeOptions()
options.add_argument(r'path/to/chromeprofile')
options.add_argument('--no-first-run --no-service-autorun --password-store=basic')
if __name__ == '__main__':
driver = uc.Chrome(executable_path=r"C:\path\to\chromedriver.exe", options=options)
driver.get("https://www.nike.com/de/t/everyday-lightweight-crew-trainingssocken-W3ZFQQ/SX7676-100")
time.sleep(5)
Size = driver.find_element_by_css_selector('#buyTools > div:nth-child(1) > fieldset > div > div:nth-child(3) > label')
Size.click()
time.sleep(2)
addtoCart = driver.find_element_by_css_selector('#floating-atc-wrapper > div > button.ncss-btn-primary-dark.btn-lg.add-to-cart-btn')
addtoCart.click()
time.sleep(5)
driver.get('https://www.nike.com/de/checkout')
I tried to follow along with some youtube tutorials in order to make my code do what I want it to do, but I still haven't found any answer on the entire internet...
Here I tried to make the script using BeautifulSoup:
import bs4
import requests
resoult = requests.get("https://www.instagram.com/kyliejenner/following/")
src = resoult.content
Soup = bs4.BeautifulSoup(src, "lxml")
links = Soup.find_all("a")
print(links)
print("/n")
for link in links:
if "FPmhX notranslate _0imsa " in link.text:
print(link)
And here I tried to do the same thing with Selenium, but the problem is that I don't know the next steps in order to make my code copy the usernames a user is following
import selenium
from selenium import webdriver
import time
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.instagram.com/")
time.sleep(2)
username = driver.find_element_by_css_selector ("#loginForm > div > div:nth-child(1) > div > label >
input")
username.send_keys ("my_username")
password = driver.find_element_by_css_selector ("#loginForm > div > div:nth-child(2) > div > label >
input")
password.send_keys("password")
loginButton = driver.find_element_by_css_selector ("#loginForm > div > div:nth-child(3)")
loginButton.click()
time.sleep(3)
saveinfoButton = driver.find_element_by_css_selector ("#react-root > section > main > div > div > div
>
section > div > button")
saveinfoButton.click()
time.sleep(3)
notnowButton = driver.find_element_by_css_selector("body > div.RnEpo.Yx5HN > div > div > div >
div.mt3GC
> button.aOOlW.HoLwm")
notnowButton.click()
I would really appreciate it if someone could solve this problem. Again, all that I want my script to do is to copy the usernames from the "following" section of someones profile.
I have a list of search results 9 search results from this site and I'd like to get the href link for each of the items in the search results.
Here is the xpath and selectors of the 1st, 2nd, and 3rd items' links:
'//*[#id="search-results"]/div[4]/div/ctl:cache/div[3]/div[1]/div/div[2]/div[2]/div[2]/p[4]/a'
#search-results > div.c_408104 > div > ctl:cache > div.product-list.grid > div:nth-child(8) > div > div.thumbnail > div.caption.link-behavior > div.caption > p.description > a
'//*[#id="search-results"]/div[4]/div/ctl:cache/div[3]/div[2]/div/div[2]/div[2]/div[2]/p[4]/a'
#search-results > div.c_408104 > div > ctl:cache > div.product-list.grid > div:nth-child(13) > div > div.thumbnail > div.caption.link-behavior > div.caption > p.description > a
'//*[#id="search-results"]/div[4]/div/ctl:cache/div[3]/div[4]/div/div[2]/div[2]/div[2]/p[2]/a'
#search-results > div.c_408104 > div > ctl:cache > div.product-list.grid > div:nth-child(14) > div > div.thumbnail > div.caption.link-behavior > div.caption > p.description > a
I've tried:
browser.find_elements_by_xpath("//a[#href]")
but this returns all links on the page, not just the search results. I've also tried using the id, but not sure what is the proper syntax.
browser.find_elements_by_xpath('//*[#id="search-results"]//a')
What you want is the attribute="href" of all the results...
So I'll show you an example:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url = 'https://www.costco.com/sofas-sectionals.html'
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
browser = webdriver.Chrome("C:\workspace\TalSolutionQA\general_func_class\chromedriver.exe",
chrome_options=chrome_options)
browser.get(url)
result_xpath = '//*[#class="caption"]//a'
all_results = browser.find_elements_by_xpath(result_xpath)
for i in all_results:
print(i.get_attribute('href'))
So what I'm doing here is just getting all the elements that I know to have the links and saving them to all_results, now in selenium we have a method get_attribute to extract the required attribute.
Hope you find this helpful!
Why is it when I add time.sleep(2), I get my desired output but if I add wait until specific xpath it gives less results?
Output with time.sleep(2) (also desired):
Adelaide Utd
Tottenham
Dundee Fc
...
Count: 145 names
Remove time.sleep
Adelaide Utd
Tottenham
Dundee Fc
...
Count: 119 names
I have added:
clickMe = wait(driver, 13).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("#page-container > div:nth-child(4) > div > div.ubet-sports-section-page > div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div.page-title-new > h1"))))
As this element is present on all pages.
Seems to be significantly less. How can I get around this issue?
Script:
import csv
import os
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://ubet.com/sports/soccer')
clickMe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//select[./option="Soccer"]/option'))))
options = driver.find_elements_by_xpath('//select[./option="Soccer"]/option')
indexes = [index for index in range(len(options))]
for index in indexes:
try:
try:
zz = wait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '(//select/optgroup/option)[%s]' % str(index + 1))))
zz.click()
except StaleElementReferenceException:
pass
from selenium.webdriver.support.ui import WebDriverWait
def find(driver):
pass
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
import time
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("#page-container > div:nth-child(4) > div > div.ubet-sports-section-page > div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div.page-title-new > h1"))))
langs0 = driver.find_elements_by_css_selector(
"div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(1) > div > div > div > div.lbl-offer > span")
langs0_text = []
for lang in langs0:
try:
langs0_text.append(lang.text)
except StaleElementReferenceException:
pass
directory = 'C:\\A.csv' #####################################
with open(directory, 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs0_text):
writer.writerow(row)
except StaleElementReferenceException:
pass
If you cannot access page, you need vpn.
Updating...
Perhaps that element loads before others. So if we changed it to datascraped (not all pages have data to be scraped).
Add:
try:
clickMe = wait(driver, 13).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(3) > div > div > div > div.lbl-offer > span"))))
except TimeoutException as ex:
pass
Same issue still present
Manual steps:
#Load driver.get('https://ubet.com/sports/soccer')
#Click drop down (//select/optgroup/option
#Wait for page elements so can scrape
Scrape:
div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(1) > div > div > div > div.lbl-offer > span
Loop repeat.
The website is built on angularjs, so your best bet would be to wait until angular has finished processing of all AJAX requests (I won't go into the underlying mechanics, but there are plenty of materials on that topic throughout the web). For this, I usually define a custom expected condition to check while waiting:
class NgReady:
js = ('return (window.angular !== undefined) && '
'(angular.element(document).injector() !== undefined) && '
'(angular.element(document).injector().get("$http").pendingRequests.length === 0)')
def __call__(self, driver):
return driver.execute_script(self.js)
# NgReady does not have any internal state, so one instance
# can be reused for waiting multiple times
ng_ready = NgReady()
Now use it to wait after zz.click():
zz.click()
wait(driver, 10).until(ng_ready)
Tests
Your original code, unmodified (without sleeping or waiting with ng_ready):
$ python so-47954604.py && wc -l out.csv && rm out.csv
86 out.csv
Using time.sleep(10) after zz.click():
$ python so-47954604.py && wc -l out.csv && rm out.csv
101 out.csv
Same result when using wait(driver, 10).until(ng_ready) after zz.click():
$ python so-47954604.py && wc -l out.csv && rm out.csv
101 out.csv
Credits
NgReady is not my invention, I just ported it to python from the expected condition implemented in Java I found here, so all credits go to the author of the answer.
#hoefling idea is absolutely the correct one, but here is an addition to the "wait for Angular" part.
The logic used inside the NgReady only checks for angular to be defined and no pending requests left to be processed. Even though it works for this website, it's not a definite answer to the question of Angular being ready to work with.
If we look at what Protractor - the Angular end-to-end testing framework - does to "sync" with Angular, it is using this "Testability" API built into Angular.
There is also this pytractor package which extends selenium webdriver instances with a WebDriverMixin which would keep the sync between the driver and angular automatically on every interaction.
You can either start using pytractor directly (it is though abandonded as a package). Or, we can try and apply the ideas implemented there in order to always keep our webdriver synced with Angular. For that, let's create this waitForAngular.js script (we'll use only Angular 1 and 2 support logic only - we can always extend it by using the relevant Protractor's client side script):
try { return (function (rootSelector, callback) {
var el = document.querySelector(rootSelector);
try {
if (!window.angular) {
throw new Error('angular could not be found on the window');
}
if (angular.getTestability) {
angular.getTestability(el).whenStable(callback);
} else {
if (!angular.element(el).injector()) {
throw new Error('root element (' + rootSelector + ') has no injector.' +
' this may mean it is not inside ng-app.');
}
angular.element(el).injector().get('$browser').
notifyWhenNoOutstandingRequests(callback);
}
} catch (err) {
callback(err.message);
}
}).apply(this, arguments); }
catch(e) { throw (e instanceof Error) ? e : new Error(e); }
Then, let's inherit from webdriver.Chrome and patch the execute() method - so that every time there is an interaction, we additionally check if Angular is ready before the interaction:
import csv
from selenium import webdriver
from selenium.webdriver.remote.command import Command
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
COMMANDS_NEEDING_WAIT = [
Command.CLICK_ELEMENT,
Command.SEND_KEYS_TO_ELEMENT,
Command.GET_ELEMENT_TAG_NAME,
Command.GET_ELEMENT_VALUE_OF_CSS_PROPERTY,
Command.GET_ELEMENT_ATTRIBUTE,
Command.GET_ELEMENT_TEXT,
Command.GET_ELEMENT_SIZE,
Command.GET_ELEMENT_LOCATION,
Command.IS_ELEMENT_ENABLED,
Command.IS_ELEMENT_SELECTED,
Command.IS_ELEMENT_DISPLAYED,
Command.SUBMIT_ELEMENT,
Command.CLEAR_ELEMENT
]
class ChromeWithAngular(webdriver.Chrome):
def __init__(self, root_element, *args, **kwargs):
self.root_element = root_element
with open("waitForAngular.js") as f:
self.script = f.read()
super(ChromeWithAngular, self).__init__(*args, **kwargs)
def wait_for_angular(self):
self.execute_async_script(self.script, self.root_element)
def execute(self, driver_command, params=None):
if driver_command in COMMANDS_NEEDING_WAIT:
self.wait_for_angular()
return super(ChromeWithAngular, self).execute(driver_command, params=params)
driver = ChromeWithAngular(root_element='body')
# the rest of the code as is with what you had
Again, this is heavily insipred by the pytractor and protractor projects.