I'm trying to web scrape using Selenium, Python and Beautiful Soup. I am scraping this page, but I want to scrape information off the pop-up window that appears when you click on the 'i' (information) icons in the corner of each product. My code is as follows:
import requests
from bs4 import BeautifulSoup
import time
import selenium
import math
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import chromedriver_binary
import re
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(ChromeDriverManager().install())
r = requests.get('https://dmarket.com/csgo-skins/product-card/ak-47-redline/field-tested')
driver.get('https://dmarket.com/csgo-skins/product-card/ak-47-redline/field-tested')
html_getter = BeautifulSoup(r.text, "html.parser")
data = html_getter.findAll(attrs={"class":"c-asset__priceNumber"})
dataskin = html_getter.findAll(attrs={"class" : "c-asset__exterior"})
time.sleep(2)
driver.find_element_by_id("onesignal-slidedown-cancel-button").click()
time.sleep(2)
driver.find_element_by_class_name("c-dialogHeader__close").click()
time.sleep(30)
driver.find_element_by_class_name("c-asset__action--info").click()
time.sleep(30)
price_element = driver.switch_to.active_element
print("<<<<<TEXT>>>>>")
print(price_element.text)
print("<<<<<END>>>>>")
driver.close()
However, when I run this, the only text that prints are "close." If you inspect the information page pop-up, it should print out the price, data from the chart, etc. How can I get it to print this info? Specifically, I want the amount sold on the most recent day and the price listed on the chart on the most recent day (both seem to be accessible in Chrome DevTools). I don't think I'm looking at the wrong frame, as I switch to the active frame, so I'm not sure how to fix this!
Related
I want to scrape the rating and all the reviews on the page .But not able to find the path .
enter code here
import urllib.request
from bs4 import BeautifulSoup
import csv
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import pandas as pd
import time
chrome_path =r'C:/Users/91940/AppData/Local/Programs/Python/Python39/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.implicitly_wait(10)
driver.get("https://www.lazada.sg/products/samsung-galaxy-watch3-bt-45mm-titanium-i1156462257-
s4537770883.html?search=1&freeshipping=1")
product_name = driver.find_element_by_xpath('//*[#id="module_product_title_1"]/div/div/h1')
print(product_name.text)
rating = driver.find_element_by_xpath("//span[#class='score-average']")
print(rate.text)
review = driver .find_element_by_xpath('//*
[#id="module_product_review"]/div/div/div[3]/div[1]/div[1]')
print(review.text)
I believe print(product_name.text) is getting execute correct, right ?
There is an issue with driver.find_element_by_xpath("//span[#class='score-average']") I could not found score-average anywhere in HTML source.
so try this instead :
driver.find_element_by_css_selector("div.pdp-review-summary")
print(rate.text)
You can try the below code to get review :
wait = WebDriverWait(driver, 10)
driver.get("https://www.lazada.sg/products/samsung-galaxy-watch3-bt-45mm-titanium-i1156462257- s4537770883.html?search=1&freeshipping=1")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[class$='pdp-review-summary__link']"))).click()
ActionChains(driver).move_to_element(wait.until(EC.visibility_of_element_located((By.XPATH, "//h2[contains(text(), 'Ratings & Reviews')]")))).perform()
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.item-content")))
for review in driver.find_elements(By.CSS_SELECTOR, "div.item-content"):
print(review.get_attribute('innerHTML'))
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
Perhaps there is a problem with your path? (apologies I'm not on windows to test). From memory, Windows paths use \ characters instead of /. Additionally, you may need two backticks after the drive path (C:\\).
c:\\Users\91940\AppData\Local\...
I would like to make scrape a web page which was opened by Selenium from a different webpage.
I entered a search term into a website using Selenium and this landed me in a new page. My aim is to create soup out of this new page. But, the soup is getting created out of the previous page where I entered my search term. Help please!
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries')
inputElement.send_keys(Keys.ENTER)
driver.wait.until(staleness_of('txtStock')
source = driver.page_source
soup = BeautifulSoup(source)
You need to know the exect company names for your search. After you are using send_keys, you tried to check for staleness of an element. I did not understand how that statement should work. I added WebDriverWait for an element of the new page.
The following works for me reagrding the selenium part up to getting the page source:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries Ltd.')
inputElement.send_keys(Keys.ENTER)
company = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'lblCompany')))
source = driver.page_source
You should add exception handling.
#Jens Dibbern has given a working solution. But it is not necessary that the exact name of the company should be given in the search. What happens is that when you type a non-exact name, a drop-down will pop up.
I have observed that until and unless this drop-down is present enter key is not working. You can check this by going to the site, pasting the name and without waiting press the enter key as fast as possible. Nothing happens.
You could also wait for this drop-down to be visible instead and the send the enter key.This also works perfectly. Note that this will end up selecting the first item in the drop-down if more than one is present.
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get('http://www.ratestar.in/')
inputElement = driver.find_element_by_css_selector("#txtStock")
inputElement.send_keys('GM Breweries')
drop_down=driver.find_element_by_css_selector("#listPlacementStock")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#listPlacementStock:not([style*="display: none"])')))
inputElement.send_keys(Keys.ENTER)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[#id="CompanyLink"]')))
source = driver.page_source
soup = BeautifulSoup(source,'html.parser')
print(soup)
I am trying to put together a web scraper to get locations by zip code entered by the user. As of right now I am able to navagate to the website but I am not able to click on the drop down button that allows you to enter in a zip code. Here is what I have so far
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd
from selenium.webdriver.common.by import By
zipcode = input("What zip code would you like to search? ")
out_table = 'Oreilly_autp_parts_addresses_{}.csv'.format(zipcode)
#Using Selenium to navigate to website, search zipcode and get html data
driver = webdriver.Chrome() #requires geckodriver.exe
driver.get('https://www.oreillyauto.com/')
time.sleep(2)
driver.maximize_window()
el = driver.find_element_by_class_name("site-store")
time.sleep(2)
driver.execute_script("arguments[0].setAttribute('class','site-store site-nav_item dispatcher-trigger--active')", el)
It seems to be clicking on the correct element but the drop down that is supposed to show up isn't there. HTML Code Block
Any help is much appreciated!
Hello I want to web scrape data from a site with an age verification pop-up using python 3.x and beautifulsoup. I can't get to the underlying text and images without clicking "yes" for "are you over 21". Thanks for any support.
EDIT: Thanks, with some help from a comment I see that I can use the cookies but am not sure how to manage/store/call cookies with the requests package.
So with some help from another user I am using selenium package so that it will work also in case it's a graphical overlay (I think?). Having trouble getting it to work with the gecko driver but will keep trying! Thanks for all the advice again, everyone.
EDIT 3: OK I have made progress and I can get the browser window to open, using the gecko driver!~ Unfortunately it doesn't like that link specification so I'm posting again. The link to click "yes" on the age verification is buried on that page as something called mlink...
EDIT 4: Made some progress, updated code is below. I managed to find the element in the XML code, now I just need to manage to click the link.
#
import time
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Firefox(executable_path=r'/Users/jeff/Documents/geckodriver') # Optional argument, if not specified will search path.
driver.get('https://www.shopharborside.com/oakland/#/shop/412');
url = 'https://www.shopharborside.com/oakland/#/shop/412'
driver.get(url)
#
driver.find_element_by_class_name('hhc_modal-body').click(Yes)
#wait.1.second
time.sleep(1)
pagesource = driver.page_source
soup = BeautifulSoup(pagesource)
#you.can.now.enjoy.soup
print(soup.prettify())
Edit new: Stuck again, here is the current code. I seem to have isolated the element "mBtnYes" but I get an error when running the code :
ElementClickInterceptedException: Message: Element is not clickable at point (625,278.5500030517578) because another element obscures it
import time
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from bs4 import BeautifulSoup
driver = webdriver.Firefox(executable_path=r'/Users/jeff/Documents/geckodriver') # Optional argument, if not specified will search path.
driver.get('https://www.shopharborside.com/oakland/#/shop/412');
url = 'https://www.shopharborside.com/oakland/#/shop/412'
driver.get(url)
#
driver.find_element_by_id('myBtnYes').click()
#wait.1.second
time.sleep(1)
pagesource = driver.page_source
soup = BeautifulSoup(pagesource)
#you.can.now.enjoy.soup
print(soup.prettify())
if your aim is to click the verification get to selenium:
ps install selenium && get geckodriver(firefox) or chromedriver(chrome)
#Mossein~King(hi i'm here to help)
import time
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options
from BeautifulSoup import BeautifulSoup
#this.is.for.headless.This.will.save.you.a.bunch.of.research.time(Trust.me)
options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(firefox_options=options)
#for.graphical(you.need.gecko.driver.for.firefox)
# driver = webdriver.Firefox()
url = 'your-url'
driver.get(url)
#get.the.link.to.clicking
#exaple if<a class='MosseinKing'>
driver.find_element_by_xpath("//a[#class='MosseinKing']").click()
#wait.1.secong.in.case.of.transitions
time.sleep(1)
pagesource = driver.page_source
soup = BeautifulSoup(pagesource)
#you.can.now.enjoy.soup
print soup.prettify()
I've written some code in python in combination with selenium to parse different product names from a webpage. There are few load more buttons visible if the browser is made to scroll downward. The webpage displays it's full content if the page is made to scroll downmost until there is no load more button to click. My scraper seems to be doing good but I'm not getting all the results. There are around 200 products in that page but I'm getting 90 out of them. What change should I bring about in my scraper to get them all? Thanks in advance.
The webpage I'm dealing with: Page_Link
This is the script I'm trying with:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("put_above_url_here")
wait = WebDriverWait(driver, 10)
page = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".listing_item")))
for scroll in range(17):
page.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
try:
load = driver.find_element_by_css_selector(".lm-btm")
load.click()
except Exception:
pass
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()
Try below code to get required data:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.purplle.com/search?q=hair%20fall%20shamboo")
wait = WebDriverWait(driver, 10)
header = driver.find_element_by_tag_name("header")
driver.execute_script("arguments[0].style.display='none';", header)
while True:
try:
page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".listing_item")))
driver.execute_script("arguments[0].scrollIntoView();", page)
page.send_keys(Keys.END)
load = wait.until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "LOAD MORE")))
driver.execute_script("arguments[0].scrollIntoView();", load)
load.click()
wait.until(EC.staleness_of(load))
except:
break
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()
You should only Use Selenium as a last resort.
A simple look around in the webpage showed the API it called to get your data.
It returns a JSON output with all the details:
Link
You can now just loop over and store in a dataframe easily.
Very fast, fewer errors than selenium.