driver = webdriver.Chrome()
URL= ['https://makemyhomevn.com/collections/ghe-an-cafe/products/ghe-go-tron']
driver.get(URL)
sleep(1)
des = driver.find_element_by_xpath('//div[#class="product-item-description"]//strong/following sibling::text()[1]')
print(des)
I expect my result as 'Gỗ tự nhiên', I have tried many ways but couldn't get the text after 'Chất liệu:'.
You can take the entire span text using .get_attribute('innerText') and then use the split function from Python like below:
driver.maximize_window()
wait = WebDriverWait(driver, 20)
driver.get("https://makemyhomevn.com/collections/ghe-an-cafe/products/ghe-go-tron")
time.sleep(1)
entire_span = wait.until(EC.visibility_of_element_located((By.XPATH, "//strong[text()='Chất liệu:']/..")))
entire_span_splitted = entire_span.get_attribute('innerText').split(":")
#print(entire_span_splitted[0])
print(entire_span_splitted[1])
Imports:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Output:
Gỗ tự nhiên.
Related
In my code, i want to get a data-value out of text. My code is this:
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="chromedriver.exe")
driver.get('https://www.google.nl')
accept = driver.find_element("xpath",'//*[#id="L2AGLb"]/div')
accept.click()
box = driver.find_element("xpath",'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
box.send_keys("placeholder")
box.send_keys(Keys.ENTER)
for x in AmountInputFinal:
close = driver.find_element("xpath", '/html/body/div[4]/div[2]/form/div[1]/div[1]/div[2]/div/div[3]/div[1]/div')
close.click()
search = driver.find_element("xpath",'/html/body/div[4]/div[2]/form/div[1]/div[1]/div[2]/div/div[2]/input')
search.click()
search.send_keys(f"{AmountInput} Dollar to {x}")
search.send_keys(Keys.ENTER)
try:
Exchange = driver.find_element("xpath", f'/html/body/div[7]/div/div[11]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div/div/div/div/div[3]/div[1]/div[1]/div[2]/span[1]').get_attribute('data-value')
except:
Exchange = driver.find_element("xpath", f'/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div/div/div/div/div[3]/div[1]/div[1]/div[2]/span[1]').get_attribute('data-value')
Exchange = len(Exchange)
Where in this case {AmountInput} = "Ten" and AmountInputFinal = ["Pound", "Euro", "Canedian Dollar"]
My output however =
12
6
18
why doesn't it work
I've tried to use normal xpath but that didnt work either. This is full XPath
Use WebDriverWait() and wait for visibility_of_element_located() and following xpath option
Exchange=WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//span[#data-value]"))).get_attribute("data-value")
print(Exchange)
You need to import below libarries.
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
I am trying to scrape each product page from aliexpress website in order to get number of comments, number of photos published by the custumer and also the custumer country and put it to a dataframe.
I have written a code that scrape custumer country but I don't know how to get the number of custumer comments and the number of images.
This is my code :
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
data.append({
'country':country,
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
I would appreciate any help from you! Thank you !
If you want numbers of comments / reviews, you can just check the value in this section :
driver.find_element(By.XPATH, 'XPATH_OF_ELEMENT_TO_SCRAP')
To do so in your exemple lets do this outside your loop :
number_feedbacks = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]/div[1]')
number_images = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]//label[1]/em')
If you dont understand or know this function, please feal free to ask and I will explain where I found theses XPATH.We also can use find by id function.
In your code it would be :
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
number_feedbacks = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]/div[1]')
number_images = driver.find_element(By.XPATH, '//*[#id="transction-feedback"]//label[1]/em')
print(f'number_feedbacks = {number_feedbacks}\nnumber_images = {number_images}')
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
data.append({
'country':country,
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
I am trying to extract google reviews of a resturant using Python Selenium. I tried to extract the reviews posted by each reviewers. Here is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome('')
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10+famous+restaurants+in+Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
total_reviews_text =driver.find_element_by_xpath("//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
#all_reviews = driver.find_elements_by_css_selector('div.gws-localreviews__google-review')
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews +=5
review_info = driver.find_elements_by_xpath("//div[#class='PuaHbe']")
for person in person_infos:
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
print(rating)
However, the above code produces/print 'none'. I am not sure where I made the mistake. Any help to fix the issue would be appreciated.
You are using a wrong XPath locator.
Instead of
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
Try using
rating = person.find_element_by_xpath("./g-review-stars/span").get_attribute('aria-label')
from selenium import webdriver
import time
from selenium.common.exceptions import NoSuchElementException
driver = webdriver.Chrome("C:/Users/ysatish/PycharmProjects/all rules/driver/chromedriver.exe")
driver.maximize_window()
driver.implicitly_wait(10)
driver.get("https://www.myntra.com/men-tshirts")
chali = driver.find_elements_by_xpath('//li//a[1]//div[2]//div[1]//span')
dak = driver.find_elements_by_xpath('//li//a[1]//div[2]//div[1]//span[1]')
sub = len(chali)
da = len(dak)
print(da)
print(sub
Updated solution:
driver.get('https://www.myntra.com/men-tshirts')
wait = WebDriverWait(driver, 10)
chali = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//li[*]//a[1]//div[2]//div[1]//span[1]")))
print(len(chali))
for element in chali:
print element.text
element.click()
dak = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//li//a[1]//div[2]//div[1]//span")))
print(len(dak))
for element0 in dak:
print element0.text
element0.click()
Note : please add below imports to your solution
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
Updated Solution:
//li[*]//a[1]//div[2]//div[1]//span[not(#class='product-strike')][not(#class='product-discountedPrice')][not(#class='product-discountPercentage')][last()]//span[1]
and
//li[*]//a[1]//div[2]//div[1]//span
Output:
I am attempting to extract all urls that https://shop.freedommobile.ca/devices has when you click the 'see options' button under each phone and place them into a list of strings.
I am using python with Selenium and wait libraries.
Ive already tried using .text in my parameters. However, I keep running into an error that states:
typeError: 'str' object is not callable
line 17 is the issue.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
class phoneCost:
driver.get("https://shop.freedommobile.ca/devices")
# extract the names of the phones
wait = WebDriverWait(driver, 20) #10 second wait
XPathLocation = """B//*[#id="skip-navigation"]/div/div/div[1]/div/div[2]/a'"""
phonePlanLinksRaw = wait.until(EC.presence_of_all_elements_located(By.XPATH(XPathLocation)))
phonePlanLinks = []
for element in range(len(phonePlanLinksRaw)):
link = element
phonePlanLinks.append(str(link))
numLink = 1
for element in range(len(phonePlanLinks)):
print("phone " + str(numLink) + " : " + phonePlanLinks[element])
numLink += 1
should return a list of urls in string format:
[https://shop.freedommobile.ca/devices/Apple/iPhone_XS_Max?sku=190198786135&planSku=Freedom%20Big%20Gig%2015GB
,
https://shop.freedommobile.ca/devices/Apple/iPhone_XS?sku=190198790569&planSku=Freedom%20Big%20Gig%
,
https://shop.freedommobile.ca/devices/Apple/iPhone_XR?sku=190198776631&planSku=Freedom%20Big%20Gig%2015GB]
Any help is appreciated
Thank you
Here is the logic that you should use.
WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH,"//div[starts-with(#class,'deviceListItem')]/a")))
mblOptions = driver.find_elements_by_xpath("//div[starts-with(#class,'deviceListItem')]/a")
mblUrls = []
for mblOption in mblOptions:
mblUrls.append(mblOption.get_attribute('href'))
print (mblUrls)
output:
['https://shop.freedommobile.ca/devices/Apple/iPhone_XS_Max?sku=190198786135&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_XS?sku=190198790569&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_XR?sku=190198776631&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_8_Plus?sku=190198454249&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_8?sku=190198450944&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10+?sku=887276301570&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10?sku=887276312163&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10e?sku=887276313870&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_Tab_A_8_LTE?sku=887276299440&planSku=Promo%20Tablet%2015', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_Note9?sku=887276279916&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S9?sku=887276250861&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Motorola/G7_Power?sku=723755134249&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Motorola/Moto_E5_Play?sku=723755125940&planSku=Freedom%20LTE%2B3G%209.5GB%20Promo', 'https://shop.freedommobile.ca/devices/Google/Pixel_3a?sku=842776111326&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Google/Pixel_3?sku=842776109798&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/Google/Pixel_3_XL?sku=842776109828&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/ZTE/Z557?sku=885913107448&planSku=Freedom%20500MB', 'https://shop.freedommobile.ca/devices/LG/G7_ThinQ?sku=652810830737&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Huawei/P30_lite?sku=886598061131&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Huawei/Mate_20_Pro?sku=886598058964&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/LG/X_Power_3?sku=652810831130&planSku=Freedom%20LTE%2B3G%209.5GB%20Promo', 'https://shop.freedommobile.ca/devices/LG/G8_ThinQ?sku=652810832434&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/LG/Q_Stylo_+?sku=652810831222&planSku=Freedom%202GB', 'https://shop.freedommobile.ca/devices/Alcatel/GoFLIP?sku=889063504010&planSku=Freedom%20500MB', 'https://shop.freedommobile.ca/devices/Bring_Your/Own_Device?sku=byod']
Try using list comprehension to achieve the reults. Just take a look at this portion (By.XPATH(XPathLocation))) that you used which should be wait.until(EC.visibility_of_all_elements_located((By.XPATH, "some_xpath"))).
Rectified one is more like:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
with webdriver.Chrome() as driver:
wait = WebDriverWait(driver, 10)
driver.get("https://shop.freedommobile.ca/devices")
item_links = [item.get_attribute("href") for item in wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[contains(#class,'__DeviceDetailsButton')]")))]
print(item_links)
To extract all urls that https://shop.freedommobile.ca/devices has using Selenium you have to induce WebDriverWait for the visibility_of_all_elements_located() and you can use the following Locator Strategy:
Code Block:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
# options.add_argument('disable-infobars')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe')
driver.get("https://shop.freedommobile.ca/devices")
print([my_elem.get_attribute("href") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//a[text()='See Options']")))])
Console Output:
['https://shop.freedommobile.ca/devices/Apple/iPhone_XS_Max?sku=190198786135&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_XS?sku=190198790569&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_XR?sku=190198776631&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_8_Plus?sku=190198454249&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Apple/iPhone_8?sku=190198450944&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10+?sku=887276301570&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10?sku=887276312163&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S10e?sku=887276313870&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_Tab_A_8_LTE?sku=887276299440&planSku=Promo%20Tablet%2015', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_Note9?sku=887276279916&planSku=Freedom%20Big%20Gig%2015GB', 'https://shop.freedommobile.ca/devices/Samsung/Galaxy_S9?sku=887276250861&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Motorola/G7_Power?sku=723755134249&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Motorola/Moto_E5_Play?sku=723755125940&planSku=Freedom%20LTE%2B3G%209.5GB%20Promo', 'https://shop.freedommobile.ca/devices/Google/Pixel_3a?sku=842776111326&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Google/Pixel_3?sku=842776109798&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/Google/Pixel_3_XL?sku=842776109828&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/ZTE/Z557?sku=885913107448&planSku=Freedom%20500MB', 'https://shop.freedommobile.ca/devices/LG/G7_ThinQ?sku=652810830737&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Huawei/P30_lite?sku=886598061131&planSku=Freedom%20Big%20Gig%20%2B%20Talk%205GB', 'https://shop.freedommobile.ca/devices/Huawei/Mate_20_Pro?sku=886598058964&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/LG/X_Power_3?sku=652810831130&planSku=Freedom%20LTE%2B3G%209.5GB%20Promo', 'https://shop.freedommobile.ca/devices/LG/G8_ThinQ?sku=652810832434&planSku=Freedom%20Big%20Gig%20%2B%20Talk%2010GB', 'https://shop.freedommobile.ca/devices/LG/Q_Stylo_+?sku=652810831222&planSku=Freedom%202GB', 'https://shop.freedommobile.ca/devices/Alcatel/GoFLIP?sku=889063504010&planSku=Freedom%20500MB', 'https://shop.freedommobile.ca/devices/Bring_Your/Own_Device?sku=byod']