As it print all details but problem is that print Master Juice repeatedly as below is highlighted in picture that is my link https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants from where i scrape
from selenium import webdriver
driver = webdriver.Chrome('F:/chromedriver')
driver.get("https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants")
# response = scrapy.Selector(text=driver.page_source)
list = driver.find_elements_by_css_selector("ul.vendor-list li")
length = len(driver.find_elements_by_css_selector("ul.vendor-list li"))
for i in range(length):
try:
name = driver.find_elements_by_css_selector(".headline .name")[i].text
time = driver.find_elements_by_css_selector(".badge-info")[i].text.strip()
rating = driver.find_elements_by_css_selector(".rating")[i].text
dealtag = driver.find_elements_by_css_selector(".multi-tag")[i].text
except:
pass
print(name,time,rating,dealtag)
It's because it prints every time even if there's an error, printing whatever was previously stored in your name, time, etc. variables. Try it when you move your print statement within your try: block
from selenium import webdriver
driver = webdriver.Chrome('F:/chromedriver')
driver.get("https://www.foodpanda.pk/restaurants/new?lat=24.9414896&lng=67.1676002&vertical=restaurants")
# response = scrapy.Selector(text=driver.page_source)
list = driver.find_elements_by_css_selector("ul.vendor-list li")
length = len(driver.find_elements_by_css_selector("ul.vendor-list li"))
for i in range(length):
try:
name = driver.find_elements_by_css_selector(".headline .name")[i].text
time = driver.find_elements_by_css_selector(".badge-info")[i].text.strip()
rating = driver.find_elements_by_css_selector(".rating")[i].text
dealtag = driver.find_elements_by_css_selector(".multi-tag")[i].text
print(name,time,rating,dealtag)
except:
pass
Related
I am trying to make a selenium python script to collect data from each job in an indeed job search. I can easily get the data from the first and second page. The problem I am running into is while looping through the pages, the script only clicks the next page and the previous page, in that order. Going from page 1 -> 2 -> 1 -> 2 -> ect. I know it is doing this because both the next and previous button have the same class name. So when I redeclare the webelement variable when the page uploads, it hits the previous button because that is the first location of the class in the stack. I tried making it always click the next button by using the xpath, but I still run into the same errors. I would inspect the next button element, and copy the full xpath. my code is below, I am using python 3.7.9 and pip version 21.2.4
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
HTTPS = "https://"
# hard coded data to test
siteDomain = "indeed.com"
jobSearch = "Software Developer"
locationSearch = "Richmond, VA"
listOfJobs = []
def if_exists_by_id(id):
try:
driver.find_element_by_id(id)
except NoSuchElementException:
return False
return True
def if_exists_by_class_name(class_name):
try:
driver.find_element_by_class_name(class_name)
except NoSuchElementException:
return False
return True
def if_exists_by_xpath(xpath):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def removeSpaces(strArray):
newjobCounter = 0
jobCounter = 0
for i, word in enumerate(strArray):
jobCounter += 1
if strArray[i].__contains__("\n"):
strArray[i] = strArray[i].replace("\n", " ")
if strArray[i].__contains__("new"):
newjobCounter += 1
print(strArray[i] + "\n")
if newjobCounter == 0:
print("Unfortunately, there are no new jobs for this search")
else:
print("With " + str(newjobCounter) + " out of " + str(jobCounter) + " new jobs!")
return strArray
try:
# Goes to Site
driver.get(HTTPS + siteDomain)
# obtains access to elements from website
searchJob = driver.find_element_by_name("q")
searchLocation = driver.find_element_by_name("l")
# clear text field
searchJob.send_keys(Keys.CONTROL, "a", Keys.BACK_SPACE)
searchLocation.send_keys(Keys.CONTROL, "a", Keys.BACK_SPACE)
# inputs values into website elements
searchJob.send_keys(jobSearch)
searchLocation.send_keys(locationSearch)
# presses button to search
searchLocation.send_keys(Keys.RETURN)
# Begin looping through pages
pageList = driver.find_element_by_class_name("pagination")
page = pageList.find_elements_by_tag_name("li")
numPages = 0
for i,x in enumerate(page):
time.sleep(1)
# checks for popup, if there is popup, exit out and sleep
if if_exists_by_id("popover-x"):
driver.find_element_by_id("popover-x").click()
time.sleep(1)
# increment page counter variabke
numPages += 1
# obtains data in class name value
jobCards = driver.find_elements_by_class_name("jobCard_mainContent")
# prints number of jobs returned
print(str(len(jobCards)) + " jobs in: " + locationSearch)
# inserts each job into list of jobs array
# commented out to make debugging easier
# for jobCard in jobCards:
# listOfJobs.append(jobCard.text)
# supposed to click the next page, but keeps alternating
# between next page and previous page
driver.find_element_by_class_name("np").click()
print("On page number: " + str(numPages))
# print(removeSpaces(listOfJobs))
except ValueError:
print(ValueError)
finally:
driver.quit()
Any help will be greatly appreciated, also if I am implementing bad coding practices in the structure of the script please let me know as I am trying to learn as much as possible! :)
I have tested your code.. the thing is there are 2 'np' class elements when we go to the 2nd page.. what you can do is for first time use find_element_by_class_name('np') and for all the other time use find_elements_by_class_name('np')[1] that will select the next button.. and you can use find_elements_by_class_name('np')[0] for the previous button if needed. Here is the code!
if i == 0:
driver.find_element_by_class_name("np").click()
else:
driver.find_elements_by_class_name("np")[1].click()
Just replace the line driver.find_element_by_class_name("np").click() with the code snippet above.. I have tested it and it worked like a charm.
Also i am not as experienced as the other devs here.. But i am glad if i could help you. (This is my first answer ever on stackoverflow)
I am trying to obtain the addresses of Walmart stores after searching Walmart.com by zip code using Python Selenium. I have searched here and online but I can't find a solution. The list of stores within the zip code populates, and I can loop through that list to get the name of each store, but then another click on a "Details" button is required to see the full address. After clicking on "Details" the information appears in the same area as the list of stores, but, I cannot obtain any information within the new "window" that appears after clicking "Details" and the looping stops. See pictures of the website below.
I have tried "wait.until(EC.visibility_of_element_located((By.XPATH..." and tried to switch between elements, windows and frames, but none of them work.
Here is some code I tried and you can see other things I tried that are commented out. While there are no errors, it doesn't work:
zip_code = '60639'
w_browser = webdriver.Chrome(ChromeDriverManager().install())
ws_url = 'https://www.walmart.com/store/finder?location=' + zip_code + '&distance=30'
w_browser.get(ws_url)
w_div = w_browser.find_elements_by_xpath('//*[#id="store-list"]/div/ol/li//a')
w_tic = str(0)
w_store_names = []
w_add_names = [] #
for ws in w_div:
w_tic = int(w_tic)
w_tic = w_tic +1
w_tic = str(w_tic)
try:
w_store = w_browser.find_element_by_xpath('//*[#id="store-list"]/div/ol/li[' + w_tic + ']/div/div[2]/span[1]/div[1]')
print(w_tic)
w_store_names.append(w_store.text)
print(w_store.text)
# main_page = w_browser.current_window_handle
# time.sleep(1)
# button = w_browser.find_element_by_link_text('store-icon-link-text')
button = w_browser.find_element_by_xpath('//*[#id="store-list"]/div/ol/li[' + w_tic + ']/div/div[2]/span[2]/span/span')
button.click()
time.sleep(1)
print('I clicked Details')
wait = WebDriverWait(w_browser, 5)
add_menu = wait.until(EC.visibility_of_element_located((By.XPATH,'//*[#id="store-finder-results"]/div/div/div[2]/div[3]/div/div[2]/div/div[1]/div')))
ActionChains(w_browser).move_to_element(add_menu).perform()
# back_button = w_browser.find_element_by_xpath('//*[#id="store-finder-results"]/div/div/div[1]/button/span/span[1]')
# back_button.click()
# store_button = find_element_by_xpath('//*[#id="store-finder-results"]/div/div/div[1]/a/button/span')
# action = ActionChains(w_browser)
# action.click(on_element = store_button)
# action.perform()
# for handle in w_browser.window_handles:
# if handle != main_page:
# add_page = handle
# w_browser.switch_to.window(add_page)
# w_frame = w_browser.find_element_by_xpath('//*[#id="store-finder-search-container"]')
# w_browser.switch_to.frame(w_frame)
w_ea = []
w_range = (1, 5)
for w_er in w_range:
w_elem = w_browser.find_element_by_xpath('//*[#id="store-finder-results"]/div/div/div[2]/div[3]/div/div[2]/div/div[1]/div/div[2]/span[' + str(w_er) + ']')
w_ea.append(w_elem.text)
print(w_elem)
w_add_names.append(w_ea)
# w_browser.switch_to.window(main_page)
# time.sleep(1)
except NoSuchElementException:
pass
print('WALMART STORES:')
print(w_store_names)
w_count = len(w_store_names)
print('There are ' + str(w_count) + ' Walmart Stores in Zip Code ' + str(zip_code))
w_browser.quit()
print(w_add_names)
I think that I have to somehow switch to the new (nested?) information pane that shows up after clicking "Details," and the switching back to get to the next store, but I don't know how. Any help would be much appreciated.
def n_seguidores(self, username):
driver = self.driver
driver.get('https://www.instagram.com/'+ username +'/')
time.sleep(3)
user_botao = driver.find_elements_by_class_name('g47SY ')
print_us = user_botao.get_attribute('title')
print(print_us)
please help me to find numbers of following from html
.find_elements_* return a list, so you need access by index.
There are 3 numbers with the same class name in the page, and the numbers of following you mean is the third.
And to get the number you can use .text, not .get_attribute('title')
Try following code:
user_botao = driver.find_elements_by_class_name('g47SY ')
#second index
print_us = user_botao[2].text
print(print_us)
Please find below code am trying to get Seller Proceeds value in Website, but it has $0, when i tried in console $0.value am getting 598.08 but am getting Calculate when i tried using this
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].text
'''
Full Code :
import pandas as pd
from webbot import Browser
from bs4 import BeautifulSoup
web = Browser()
##web.set_window_position(-10000,0)
df = pd.read_excel('sample.xlsx')
soafees = []
fulfees = []
selproc = []
for ind in df.index:
web.go_to('https://somelink')
## web.set_window_position(-10000,0)
web.click(id='link_continue')
print("Login Successful")
asin = df['ASIN'][ind]
sp = int(df['Selling Price'][ind])
print(sp)
cp = int(df['Cost of Product'][ind])
print(cp)
web.type(df['ASIN'][ind] , into = 'Enter your product name, UPC, EAN, ISBN or ASIN',clear = True)
web.click(id='a-autoid-0')
web.type(sp,tag='input',id='afn-pricing',clear = True)
web.type(cp,tag='input',id='afn-cost-of-goods',clear = True)
web.click(id='update-fees-link')
res = web.find_elements(id="afn-selling-fees")[0].text
ful_fees = web.find_elements(id="afn-amazon-fulfillment-fees")[0].text
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].text
## sel_proc = web.execute_script('return arguments[0].value;', element);
print("soa fees : "+res)
print("Fulfillment fees : "+ful_fees)
print("Seller Proceeds : "+sel_proc)
soafees.append(res)
fulfees.append(ful_fees)
selproc.append(sel_proc)
print(soafees)
print(fulfees)
print(selproc)
df_soa = pd.DataFrame(soafees,columns = ['SOA Fees'])
df_ful = pd.DataFrame(fulfees,columns = ['FBA Fees'])
df_sel = pd.DataFrame(selproc,columns = ['Seller Proceeds'])
print(df)
print(df_soa)
print(df_ful)
print(df_sel)
Snapshot for reference:
thanks in advance for your support
In the sel_proc variable, you are storing the text, Instead, you should look for the attribute which has the value. I believe, in this case, it should be a "value" attribute.
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].get_attribute(<attribute_name>)
Your code will look something like this:
sel_proc = web.find_elements(id="afn-seller-proceeds")[0].get_attribute("value")
So I wrote some code to grab data about classes at a college to build an interactive scheduler. Here is the code I have to get data:
from selenium import webdriver
import os
import pwd
import shlex
import re
import time
usr = pwd.getpwuid(os.getuid()).pw_name
Path = ('/Users/%s/Downloads/chromedriver') %usr # Have chromedriver dowloaded
# Create a new instance of the Chrome driver
options = webdriver.ChromeOptions()
options.binary_location = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
options.add_argument('headless') # Headless so no window is opened
options.add_argument('window-size=1200x600')
driver = webdriver.Chrome(Path, chrome_options=options)
driver.get('https://web.stevens.edu/scheduler/core/2017F/2017F.xml') # Go to database
classes = {}
def Database(AllSelectedCourseInfo):
ClassDict = {}
for item in AllSelectedCourseInfo: # Go through list of class info
try:
thing = item.split("=") # Split string by = to get subject name and value
name = thing[0]
if any(char.isdigit() for char in thing[1]): # Get rid of annoying Z at the end of numbers
thing[1] = re.sub("[Z]","",thing[1])
value = thing[1]
if value: # If subject has a value, store it
ClassDict[str(name)] = str(value) # Store value in a dictionary with the subject as the key
except:
pass
classes[str(ClassDict["Section"])] = ClassDict # Add to dictionary
def makeDatabase(section):
if "Title" in driver.find_element_by_xpath("//*[text()='%s']"%section).find_element_by_xpath("..").text:
classSection = driver.find_elements_by_xpath("//*[text()='%s']"%section) # If class name given find class
for i in range(0, len(classSection)):
AllSelectedCourseInfo = shlex.split(classSection[i].find_element_by_xpath(".." + "/.."*4).text.replace("/>", "").replace(">", "")) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
Database(AllSelectedCourseInfo)
else:
classSection = driver.find_element_by_xpath("//*[text()='%s']"%section) # If class section give, find class
AllSelectedCourseInfo = shlex.split(classSection.find_element_by_xpath(".." + "/.."*3).text.replace("/>", "").replace(">", "")) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
Database(AllSelectedCourseInfo)
def printDic():
for key in classes:
print "\n-------------%s------------" %key
for classkey in classes[key]:
print "%s : %s" %(classkey, classes[key][classkey])
start = time.time()
makeDatabase("Differential Calculus")
makeDatabase("MA 124B")
printDic()
end = time.time()
print end - start
driver.quit()
It takes about 20 seconds for me to pull data from one class and one class section, if I am to make this practical it is going to need at least 7 classes, and that would take over a minute just to create the dictionaries. Does anyone know of a way to make this run any faster?
I tried to integrate lxml and requests into my code but it just didn't have what I was looking for. After a few days of trying to use lxml to accomplish this with no avail I decided to try beautifulsoup4 with urllib. This worked better than I could have hoped,
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
import urllib
import shlex
import re
import time
h = HTMLParser()
page = urllib.urlopen('https://web.stevens.edu/scheduler/core/2017F/2017F.xml').read() # Get to database
soup = BeautifulSoup(page)
RawClassData = soup.contents[10].contents[0].contents[0].contents
classes = {}
backupClasses = {}
def makeDatabase():
for i in range(0, len(RawClassData)): # Parse through each class
try:
AllSelectedCourseInfo = shlex.split(h.unescape(str(RawClassData[i]).replace(">", " "))) # sort into a list grouping string in quotes and getting rid of unnecessary symbols
ClassDict = {}
for item in AllSelectedCourseInfo: # Go through list of class info
try:
thing = item.split("=") # Split string by = to get subject name and value
name = thing[0]
if any(char.isdigit() for char in thing[1]): # Get rid of annoying Z at the end of numbers
thing[1] = re.sub("[Z]","",thing[1])
value = thing[1]
if value: # If subject has a value, store it
ClassDict[str(name)] = str(value) # Store value in a dictionary with the subject as the key
except:
pass
classes[str(ClassDict["section"])] = ClassDict
except:
pass
def printDic():
with open("Classes", "w") as f:
for key in classes:
f.write("\n-------------%s------------" %key)
for classkey in classes[key]:
f.write( "\n%s : %s" %(classkey, classes[key][classkey]))
f.write("\n")
def printSection(selection):
print "\n-------------%s------------" %selection
for classkey in classes[selection]:
print "%s : %s" %(classkey, classes[selection][classkey])
def printClass(selection):
try:
for key in classes:
if classes[key]["title"] == selection:
print "\n-------------%s------------" %key
for classkey in classes[key]:
print "%s : %s" %(classkey, classes[key][classkey])
finally:
print "\n-------------%s------------" %selection
for classkey in classes[selection]:
print "%s : %s" %(classkey, classes[selection][classkey])
start = time.time()
makeDatabase()
end = time.time()
printClass("Circuits and Systems")
printClass("Differential Equations")
printClass("Writing & Communications Collqm")
printClass("Mechanics of Solids")
printClass("Electricity & Magnetism")
printClass("Engineering Design III")
printClass("Freshman Quiz")
printDic()
print end - start
This new code creates a library of all classes then prints out the desired class, all in 2 seconds. The selenium code took 89 seconds to just build the library for the desired classes and print them out, I would say thats a slight improvement... Thanks a ton to perfect5th for the suggestion!