I am trying to develop a web scraper using Python, Beautiful Soup, and Selenium that can peruse the steam community marketplace.
import requests
from bs4 import BeautifulSoup
import time
import selenium
from selenium import webdriver
import chromedriver_binary
driver = webdriver.Chrome("")
steam_market_URL = 'https://steamcommunity.com/market/search?q=&category_730_ItemSet%5B%5D=any&category_730_ProPlayer%5B%5D=any&category_730_StickerCapsule%5B%5D=any&category_730_TournamentTeam%5B%5D=any&category_730_Weapon%5B%5D=any&appid=730#p1_popular_desc'
driver.get(steam_market_URL)
for pageNum in range(1,6):
steam_market_HTML = requests.get(steam_market_HTML).text
HTML_parser = BeautifulSoup(steam_market_HTML, 'html.parser')
popular_steam_items = HTML_parser.findAll(attrs = {"class" : "market_listing_searchresult"})
popular_steam_items_URL = HTML_parser.findAll(attrs={"class" : "market_listing_row_link"})
for item in range(0,len(popular_steam_items)):
print(popular_steam_items[item]["data-hash-name"] + " " + popular_steam_items_URL[item]["href"] + "\n")
driver.find_element_by_id_name("searchResults_btn_next").click()
time.sleep(.5)
In theory, this should navigate through the first five pages of the steam "popular items" list and add names of each item + the URL for that item, waiting .5 seconds between each page switch (if I biffed here and my code won't work please let me know!).
However, after running the code I am faced with this error:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/common/service.py", line 72, in start
self.process = subprocess.Popen(cmd, env=self.env,
File "/usr/lib/python3.8/subprocess.py", line 854, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "/usr/lib/python3.8/subprocess.py", line 1702, in _execute_child
raise child_exception_type(errno_num, err_msg, err_filename)
PermissionError: [Errno 13] Permission denied: ''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "seleniumTest.py", line 7, in <module>
driver = webdriver.Chrome("")
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/chrome/webdriver.py", line 73, in __init__
self.service.start()
File "/usr/local/lib/python3.8/dist-packages/selenium/webdriver/common/service.py", line 86, in start
raise WebDriverException(
selenium.common.exceptions.WebDriverException: Message: '' executable may have wrong permissions. Please see https://sites.google.com/a/chromium.org/chromedriver/home
Most other versions of this error that I have seen on SE provide a location after '[Errno 13] Permission denied:' and I'm a little bit lost on what to change here. Any help would be greatly appreciated! Thanks!
Your code have 3 bugs
first do not put "" inside parentheses
driver = webdriver.Chrome()
second
steam_market_HTML = requests.get(steam_market_URL).text
Third name and id are two different attribute of WebDriver so you can call one at a time
in your case it is ID
driver.find_element_by_id("searchResults_btn_next").click()
It will be the correct code I hope
import requests
from bs4 import BeautifulSoup
import time
import selenium
from selenium import webdriver
import chromedriver_binary
driver = webdriver.Chrome()
steam_market_URL = 'https://steamcommunity.com/market/search?q=&category_730_ItemSet%5B%5D=any&category_730_ProPlayer%5B%5D=any&category_730_StickerCapsule%5B%5D=any&category_730_TournamentTeam%5B%5D=any&category_730_Weapon%5B%5D=any&appid=730#p1_popular_desc'
driver.get(steam_market_URL)
for pageNum in range(1,6):
steam_market_HTML = requests.get(steam_market_URL).text
HTML_parser = BeautifulSoup(steam_market_HTML, 'html.parser')
popular_steam_items = HTML_parser.findAll(attrs = {"class" : "market_listing_searchresult"})
popular_steam_items_URL = HTML_parser.findAll(attrs={"class" : "market_listing_row_link"})
for item in range(0,len(popular_steam_items)):
print(popular_steam_items[item]["data-hash-name"] + " " + popular_steam_items_URL[item]["href"] + "\n")
driver.find_element_by_id("searchResults_btn_next").click()
time.sleep(.5)
Related
I am using the following code to search google and click on first search result.
from selenium import webdriver
import urllib.parse
import time
from selenium.webdriver.firefox.options import Options
options = Options()
options.set_preference("dom.popup_maximum", 100)
options.add_argument("-profile")
options.add_argument("/home/blueray/.mozilla/firefox/5ertyoox.default-release")
options.page_load_strategy = 'eager'
# options.add_extension('fhnegjjodccfaliddboelcleikbmapik.crx')
browser = webdriver.Firefox(options=options)
with open("google-search-terms.adoc") as fin:
for line_no, line in enumerate(fin):
line = line.strip()
query = urllib.parse.urlencode({'q': line + " site:amazon.com"})
browser.execute_script(f"window.open('https://www.google.com/search?{query}');")
time.sleep(5)
for x in range(1, len(browser.window_handles)):
browser.switch_to.window(browser.window_handles[x])
try:
elm = browser.find_elements_by_xpath(
'/html/body/div[7]/div/div[9]/div[1]/div/div[2]/div[2]/div/div/div[1]/div/div/div[1]/a/h3')
if not elm:
elm = browser.find_elements_by_xpath(
'/html/body/div[7]/div/div[9]/div[1]/div/div[2]/div[2]/div/div/div[1]/div/div/div/div[1]/a/h3')
elm[0].click()
except Exception as e:
print("Error", str(e))
However, if one instance of firefox is open and I run the script it gives the message:
Firefox is already running, but is not responding. To use Firefox, you
must first close the existing Firefox process, restart your device, or
use a different profile.
And the program is terminated with the following error:
Traceback (most recent call last):
File "google-search-amazon-less-captcha.py", line 13, in <module>
browser = webdriver.Firefox(options=options)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/firefox/webdriver.py", line 170, in __init__
RemoteWebDriver.__init__(
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/blueray/.local/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status 0
What should i do so that there is no error even if an instance of firefox is already open?
I'm having the same issue but only if the open firefox instance has the same profile that I'm loading in the script. If you remove the profile from this script it should run. It should also work if your code uses a different profile that the current open window is using.
You can also use the deprecated selenium 3 way of loading a profile and this avoids the error for me.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
ffOptions = Options()
ffProfile = FirefoxProfile(r'C:\Users\Tyler\AppData\Roaming\Mozilla\Firefox\Profiles\0753x1pz.default')
ffOptions.profile = ffProfile
driver = webdriver.Firefox(options=ffOptions)
driver.get("http://www.google.com")
I'm still looking for a viable solution using the selenium 4 way of setting a profile.
Sometime, Selenium scripts are leaving web drivers open, not closing them properly.
A good pratice would be a good try/except/finally block:
driver = webdriver.Firefox()
try:
# Your code
except Exception as e:
# Log or print error
finally:
driver.close()
driver.quit()
Also, you should trying to kill any firefox processes running on your system as part of a python script, using something like this:
.....
.....
import os
os.system("taskkill /im geckodriver.exe /f")
os.system("taskkill /im firefox.exe /f")
I tried to make a bot which looks up if the Bob Marley TShirt from Ajax Amsterdamn is available.
https://www.adidas.ch/de/ajax-3-jsy/GT9559.html <- there is the link
I was able to get it running (also added an telegram bot to report the succses:
#!/usr/bin/python3
from selenium import webdriver
import requests
token="" # expunged for obvious reasons
chat="" # expunged for obvious reasons
message="Available"
fireFoxOptions = webdriver.FirefoxOptions()
fireFoxOptions.set_headless()
browser = webdriver.Firefox(firefox_options=fireFoxOptions)
browser.get("https://www.adidas.ch/de/ajax-3-jsy/GT9559.html")
if ("Dieses Produkt ist leider ausverkauft." not in browser.page_source):
send_text = 'https://api.telegram.org/bot' + token + '/sendMessage?chat_id=' + chat + '&parse_mode=Markdown&text=' + message
response = requests.get(send_text)
print(response.json())
browser.close()
Working on:
selenium 3.141.0
python 3
Firefox 91.0.2
geckodriver 0.29.1
OS: Manjaro Linux
So after that I tried to deploy it on my Debian 10 Server but here is where the struggle began. I had to install Firefox 78.14.0esr and according to the github release page of the Geckodriver version 0.27.0 of it. Selenium stayed the same with 3.141.0. From what I know and what I researched the versions should be alright but when executed throw this nervewrecking error:
Traceback (most recent call last):
File "./ajax.py", line 18, in <module>
browser = webdriver.Firefox(options=options) #, capabilities=cap, executable_path="/usr/local/bin/geckodriver")
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py", line 174, in __init__
keep_alive=True)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.SessionNotCreatedException: Message: Unable to find a matching set of capabilities
I searched the error up and apparently you have to define the binary paths and the capability "marionette" now I did this and now the code looks like this (including some debugging stuff):
#!/usr/bin/python3
from selenium import webdriver
import requests
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
#token="1995953311:AAH-D7S-MISkCa0yQxUc84Gf978fz0vtoqY"
#chat="1917512203"
message="just a test"
options = FirefoxOptions()
options.add_argument("--headless")
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = False
binary = "/usr/bin/firefox"
options.binary = binary
browser = webdriver.Firefox(options=options, capabilities=cap, executable_path="/usr/local/bin/geckodriver")
browser.get("https://www.adidas.ch/de/ajax-3-jsy/GT9559.html")
if ("Dieses Produkt ist leider ausverkauft." not in browser.page_source):
send_text = 'https://api.telegram.org/bot' + token + '/sendMessage?chat_id=' + chat + '&parse_mode=Markdown&text=' + message
response = requests.get(send_text)
print(response.json())
else:
print("succ")
browser.close()
But now I get the following error:
Traceback (most recent call last):
File "./ajax.py", line 18, in <module>
browser = webdriver.Firefox(options=options, capabilities=cap, executable_path="/usr/local/bin/geckodriver")
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py", line 191, in __init__
self.binary, timeout)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/firefox/extension_connection.py", line 52, in __init__
self.binary.launch_browser(self.profile, timeout=timeout)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/firefox/firefox_binary.py", line 73, in launch_browser
self._wait_until_connectable(timeout=timeout)
File "/home/webadmin/.local/lib/python3.7/site-packages/selenium/webdriver/firefox/firefox_binary.py", line 104, in _wait_until_connectable
"The browser appears to have exited "
selenium.common.exceptions.WebDriverException: Message: The browser appears to have exited before we could connect. If you specified a log_file in the FirefoxBinary constructor, check it for details.
Also changing the cap["marionette"] = False to True just gives me the older error message.
Thank You!
I just reverted all the changes, made an Docker container and put that on the server.
I'm trying to prepare a bot for a platform with using python and selenium. My codes are below. When I runned I saw a error like that:
Traceback (most recent call last):
File "C:\Users\doguk\Desktop\Python\eksi_01.py", line 76, in <module>
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\support\wait.py", line 71, in until
value = method(self._driver)
TypeError: 'bool' object is not callable
## necessary import codes
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
import time
import xlrd
import random
## random waiting time
random_Get_Time = random.randint(1,5)
## importing data file
data_File_Path = "C:/Users/doguk/Desktop/Python/DataFile.xls"
## targeting the data
dataFile = xlrd.open_workbook(data_File_Path)
accountInfo = dataFile.sheet_by_name("account_Ids_And_Passwords")
usernameCount = accountInfo.nrows
passwordCount = accountInfo.ncols
for curr_row in range(1, usernameCount):
username = accountInfo.cell_value(1, 0)
password = accountInfo.cell_value(1, 1)
## setting up recaptcha solver plugin to browser
recaptcha_Solver_Plugin_Path = "C:/Users/doguk/Desktop/Python/plugin.zip"
## adding recaptcha solver plugin to browser
recaptchaSolverAddOptions = webdriver.ChromeOptions()
recaptchaSolverAddOptions.add_extension(recaptcha_Solver_Plugin_Path)
## browser driver code
chrome_Driver_Path = "C:/Users/doguk/Desktop/Python/chromedriver"
driver = webdriver.Chrome(chrome_Driver_Path, options=recaptchaSolverAddOptions)
## enter the browser
driver.get("**URL**")
time.sleep(random_Get_Time)
## fullscreen code
driver.maximize_window()
## filling account infos
login_Form_Username = driver.find_element_by_id('username')
login_Form_Username.send_keys(username)
time.sleep(random_Get_Time)
login_Form_Password = driver.find_element_by_id('password')
login_Form_Password.send_keys(password)
## waiting for captcha confirmation
recaptchaDisplayed = driver.find_element_by_class_name('g-recaptcha')
if(recaptchaDisplayed.is_displayed()):
recaptchaSolved = driver.find_element_by_partial_link_text('Solved')
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
else:
time.sleep(random_Get_Time)
## log in to platform
login_Button = driver.find_element_by_class_name('btn')
login_Button.click()
time.sleep(random_Get_Time)
## logout from driver
driver.quit()
How can I fix that?
What I'm trying to do is there is a Google Captcha confirmation on login page. If there isn't don't wait much. If there is, wait until recaptcha solver plugin fix it.
If I remove "## waiting for captcha confirmation" section and add time.sleep(110) it will solved. But I need to make that similar with human. I'm waiting for your helps.
Instead of
WebDriverWait(driver,30).until(recaptchaSolved.is_displayed())
I tried that
WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.XPATH, "//*[contains(text(),'Solved')]")))
But it gave an error again. Like that:
Traceback (most recent call last):
File "C:\Users\doguk\Desktop\Python\eksi_01.py", line 75, in <module>
recaptchaSolved = driver.find_element_by_partial_link_text('Solved')
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 462, in find_element_by_partial_link_text
return self.find_element(by=By.PARTIAL_LINK_TEXT, value=link_text)
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 976, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\doguk\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"partial link text","selector":"Solved"}
(Session info: chrome=92.0.4515.159)
I have tried all fixes but for some reason, exceptions are coming in my code. Please help me out. The code block is trying to automate a mass WhatsApp messaging bot.
The code was adapted from a publicly available GitHub repository.
The chrome version is updated to the newest release. I am using python 3.9 environment with the latest pip installer, updated selenium library, and the most recent chromedriver extension.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from time import sleep
from urllib.parse import quote
options = Options()
#options.add_argument("user-data-dir=/tmp/tarun")
#options.add_argument("user-data-dir=C:\\Users\\anirudh_bagri\\AppData\\Local\\Google\\Chrome\\User Data")
f = open("message.txt", "r")
message = f.read()
f.close()
print('This is your message:')
print(message)
message = quote(message)
numbers = []
f = open("numbers.txt", "r")
for line in f.read().splitlines():
if line != "":
numbers.append(line)
f.close()
print('\nWe found ' + str(len(numbers)) + ' numbers in the file')
delay = 30
print('Once your browser opens up, make sure you sign in to web whatsapp and then press enter')
driver = webdriver.Chrome(executable_path=r'C:\Users\LEGION\Desktop\whatsapp-bulk-messenger-master\chromedriver.exe', options=options)
driver.get('https://web.whatsapp.com')
input()
for number in numbers:
if number == "":
continue
print('Sending message to: ' + number)
try:
url = 'https://web.whatsapp.com/send?phone=' + number + '&text=' + message
driver.get(url)
click_btn = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME , '_3M-N-')))
click_btn.click()
sleep(1)
print('Message sent to: ' + number)
except Exception:
print('Failed to send message to ' + number)
This is the exception that is coming up.
= RESTART: C:\Users\LEGION\Desktop\whatsapp-bulk-messenger-master\automator.py =
This is your message:
Hello World,
This is my text to you from automated messaging system.
Thank You
We found 1 numbers in the file
Once your browser opens up, make sure you sign in to web whatsapp and then press enter
Traceback (most recent call last):
File "C:\Users\LEGION\Desktop\whatsapp-bulk-messenger-master\automator.py", line 31, in <module>
driver = webdriver.Chrome(executable_path=r'C:\Users\LEGION\Desktop\whatsapp-bulk-messenger-master\chromedriver.exe', options=options)
File "C:\Users\LEGION\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 76, in __init__
RemoteWebDriver.__init__(
File "C:\Users\LEGION\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 157, in __init__
self.start_session(capabilities, browser_profile)
File "C:\Users\LEGION\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 252, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "C:\Users\LEGION\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "C:\Users\LEGION\AppData\Local\Programs\Python\Python39\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Failed to create Chrome process.
>>>
This most probably happens when you run Chrome as an administrator. Try running it "not as an admin" and check once.
Just got to Chrome's properties->compatibility and change that
I was trying over the past few hours , without success , to make webdriver.Firefox() run properly. I reinstalled geckodriver and verified it was added to the path , i tried to use FirefoxBinary but it didn't help.
I could not find any solution , and I am not sure how to fix this , I would appreciate your help.
This is the code :
#!/usr/bin/env python
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import time
browser = webdriver.Firefox()
browser.get("https://testweb.com")
time.sleep(10)
username = browser.find_element_by_id("extpatid")
password = browser.find_element_by_id("extpatpw")
username.send_keys("username")
password.send_keys("password")
login_attempt = browser.find_element_by_xpath("//*[#type='submit']")
login_attempt.submit()
Below are the exceptions received when I tried to execute the script :
Traceback (most recent call last):
File "./braude.py", line 7, in <module>
browser = webdriver.Firefox()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/firefox/webdriver.py", line 140, in __init__
self.service.start()
File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/common/service.py", line 74, in start
stdout=self.log_file, stderr=self.log_file)
File "/usr/lib/python2.7/subprocess.py", line 711, in __init__
errread, errwrite)
File "/usr/lib/python2.7/subprocess.py", line 1343, in _execute_child
raise child_exception
OSError: [Errno 8] Exec format error
Python version :
# /usr/bin/python -V
Python 2.7.12+
I'm not sure why you have this line:
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
but it's completely unnecessary and likely related to your ultimate problem - a wrong version of the Firefox webdriver module.
Try Chrome and see how it reacts with that.