I want to get the order ID from the API response. When I click on the Create Order button it will send a POST API request and return the unique ID that I want.
This is my order creation code.
from datetime import date
import time
from seleniumwire import webdriver
from openpyxl import load_workbook
from Locators.PracticeLocators import PracticeLocators
from pageObjects.LoginPage import LoginScreen
today = date.today()
currentDate = today.strftime("%m/%d/%Y")
FilePath = "C:/Users/Administrator/PycharmProject/LegrandePython/TestData/Data.xlsx"
datafile = load_workbook(FilePath)
testData = datafile['Test Data']
loginData = datafile["Login Credentials"]
scriptData = datafile["Script Data"]
driver = webdriver.Chrome(executable_path="C:/Users/Administrator/Downloads/chromedriver.exe")
driver.maximize_window()
driver.scopes = [
'.*https://ibis-dev.droicelabs.us/api/dispenser/orders/.*'
]
driver.get(loginData.cell(4, 2).value)
driver.implicitly_wait(5)
login = LoginScreen(driver)
login.SetUsername(loginData.cell(4, 3).value)
login.SetPassword(loginData.cell(4, 4).value)
login.SignIn()
driver.implicitly_wait(20)
driver.find_element_by_class_name(PracticeLocators.Add_RX).click()
PatientSearch = driver.find_element_by_xpath(PracticeLocators.Patient_search_textbox)
PatientSearch.click()
PatientSearch.send_keys(testData.cell(2, 1).value)
driver.find_element_by_xpath("(//*[text()='" + testData.cell(2, 2).value + "'])[1]").click()
DoctorSearch = driver.find_element_by_xpath(PracticeLocators.doctor_search_textbox)
DoctorSearch.click()
time.sleep(1)
DoctorSearch.send_keys(scriptData.cell(2, 8).value)
time.sleep(1)
driver.find_element_by_xpath(
"(//*[text()='" + scriptData.cell(2, 8).value + " " + "Practice'])[2]").click()
driver.find_element_by_xpath(PracticeLocators.NextButton).click()
driver.find_element_by_xpath(PracticeLocators.CreateOnetimeRXButton).click()
driver.find_element_by_name(PracticeLocators.OnetimeSearchMedicine).send_keys(scriptData.cell(2, 1).value)
time.sleep(2)
driver.find_element_by_xpath("//*[text()='" + scriptData.cell(2, 1).value + "']").click()
driver.find_element_by_xpath(PracticeLocators.AddButton).click()
driver.find_element_by_xpath(PracticeLocators.ProductQuantity).click()
time.sleep(1)
driver.find_element_by_xpath(PracticeLocators.Quantity).click()
driver.find_element_by_xpath(PracticeLocators.ProductRefilles).click()
time.sleep(1)
driver.find_element_by_xpath(PracticeLocators.Quantity).click()
time.sleep(2)
driver.find_element_by_xpath(PracticeLocators.DAWCheckbox).click()
time.sleep(2)
instruction = driver.find_element_by_xpath(PracticeLocators.productInstruction)
instruction.click()
instruction.send_keys(testData.cell(2, 3).value)
driver.find_element_by_xpath(PracticeLocators.allergiesButton).click()
allergies = driver.find_element_by_xpath(PracticeLocators.allergiesTextbox)
allergies.clear()
allergies.send_keys(testData.cell(2, 4).value)
driver.find_element_by_xpath(PracticeLocators.doneButton).click()
driver.find_element_by_xpath(PracticeLocators.addDropchartButton).click()
time.sleep(2)
element = driver.find_element_by_xpath(PracticeLocators.selectDocuments)
driver.execute_script("arguments[0].click()", element)
driver.find_element_by_xpath(PracticeLocators.selectButton).click()
driver.find_element_by_xpath(PracticeLocators.skipPayment).click()
driver.find_element_by_xpath(PracticeLocators.surgeryDate).send_keys(currentDate)
createOrderButton = driver.find_element_by_xpath(PracticeLocators.submit_CreateOrderButton)
driver.execute_script("arguments[0].click()", createOrderButton)
for request in driver.requests:
if request.response:
print(
request.url,
request.response.status_code,
request.response.headers['Content-Type'])
time.sleep(7)
When self.driver.execute_script("arguments[0].click()", createOrderButton) this line execute it will send the POST API https://ibis-dev.droicelabs.us/api/dispenser/orders/ and return unique order id that I want.
I have give scope but it will return many API call
driver.scopes = [
'.*https://ibis-dev.droicelabs.us/api/dispenser/orders/.*'
]
https://ibis-dev.droicelabs.us/api/dispenser/orders/ this API will call when I click on the create order button and return order id.
It will not show the API that I have used in a scope. And how to get the response of that API which I have given in scope.
I am new to selenium python and I have done it in cypress. but don't know how to do it using selenium python.
This is how to capture HTTP requests using Selenium:
1 Install package
pip install selenium-wire
2 Use driver.requests to get some data
from seleniumwire import webdriver
driver = webdriver.Chrome()
# do you actions with driver
for request in driver.requests:
if request.response:
print(
request.url,
request.response.status_code,
request.response.headers['Content-Type'])
3 You might want to limit requests capture to some specific hosts
https://pypi.org/project/selenium-wire/#limiting-request-capture
driver.scopes = [
'.*ibis-dev.droicelabs.us/api/dispenser/orders/.*'
]
4 How to read response body
import json
...
for request in driver.requests:
if request.response:
data = json.loads(request.response.body)
print(data['foo'])
Answering this question - get order id
The final solution for getting order_id for multiple portals which handles UnicodeDecodeError, JSONDecodeError errors and prints some debug details:
#define scopes to capture the required requests
driver.scopes = [
'.*/api/dispenser/orders/.*',
'.*/api/practice/orders/.*',
'.*/api/practice/subscriptions/.*'
]
# UI steps performed...
# The last UI action
createOrderButton = driver.find_element_by_xpath(PracticeLocators.submit_CreateOrderButton)
driver.execute_script("arguments[0].click()", createOrderButton)
time.sleep(5)
order_id = None
# read requests for order_id
for request in driver.requests:
if request.response:
if request.method == 'POST':
print(request.method + ' ' + request.url)
try:
data = json.loads(request.response.body)
print('parsed as json')
if '_id' in data:
order_id = data['_id']
except UnicodeDecodeError:
try:
data = json.loads(decompress(request.response.body))
print('decompressed and parsed as json')
if '_id' in data:
order_id = data['_id']
except json.decoder.JSONDecodeError:
data = request.response.body
print('decompressed and parsed as string')
print(data)
print(order_id)
Limitations
Selenium Wire will currently work with tests that run on the same machine as the browser. A distributed setup using Selenium Grid is not yet supported.
Sites that use NTLM authentication (Windows authentication) cannot currently be tested with Selenium Wire. NTLM authentication is not supported.
Reference
https://pypi.org/project/selenium-wire/
Related
I am trying to automatically access a website that has a Geetest V3 captcha.
I wrote a python script with Selenium that gets the required "gt" and "challenge" tokens and send it to the workers to solve it.
Once I get back the solution, which is a JSON structured like this:
{
"challenge": "xxxxxxxxx",
"validate": "xxxxxxxxxxx",
"seccode": "xxxxxxxxxxxxx|jordan"
}
I don't know how to send these parameters to the webpage and get to the actual homepage.
Worth mentioning that I've blocked external urls that would make the tokens expire once the page load with 2 lines of code that I have found on another post:
driver.execute_cdp_cmd('Network.setBlockedURLs', {"urls": ["api.geetest.com/get.php"]})
driver.execute_cdp_cmd('Network.enable', {})
I have found out that these two lines are also blocking the loading of the geetest HTML form so I can't fill it with a "find_element" method but It seems they are necessary to prevent the website to refresh the "challenge" token.
from anticaptchaofficial.geetestproxyless import *
from selenium import webdriver
from selenium.webdriver.common.by import By
import telegram
import re
CLEANR = re.compile('<.*?>')
def cleanhtml(raw_html):
cleantext = re.sub(CLEANR, '', raw_html)
return cleantext
anticaptcha_api_key = "xxxxxxxxxxxxxxxx"
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')
url = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
driver.execute_cdp_cmd('Network.setBlockedURLs', {"urls": ["api.geetest.com/get.php"]})
driver.execute_cdp_cmd('Network.enable', {})
page = driver.get(url)
html_key = driver.find_element(By.XPATH, "//script[contains(text(), 'gt:')]").get_attribute('outerHTML')
clean_key = cleanhtml(html_key)
print(clean_key)
clean_key = clean_key.split()
list_of_keys = []
for i in clean_key:
if len(i) == 35:
i = i.replace('"', "")
i = i.replace(',', "")
list_of_keys.append(i)
print(list_of_keys)
GT_KEY = list_of_keys[0]
CHALLENGE_KEY = list_of_keys[1]
print(CHALLENGE_KEY)
solver = geetestProxyless()
solver.set_verbose(1)
solver.set_key(anticaptcha_api_key)
solver.set_website_url(url)
solver.set_gt_key(GT_KEY)
solver.set_challenge_key(CHALLENGE_KEY)
g_response = solver.solve_and_return_solution()
if g_response != 0:
print("g_response", g_response)
else:
print("task finished with error" + solver.error_code)
challenge = g_response["challenge"]
validate = g_response["validate"]
seccode = g_response["seccode"]
print(challenge, validate, seccode)
# WHAT NOW? HOW TO PASS challenger, validate and seccode to the website?
I'm trying to upload an image captcha to 2captcha API but I'm receiving the error ERROR_ZERO_CAPTCHA_FILESIZE but my file size on my directory is above 60KB why am I still receiving this error?
Am I missing something?
I've tried sending the only captcha image(originally downloaded from the source) but I received the same error or TOO_MANY_BAD_IMAGES. Please help me.
CODE:
from selenium import webdriver
import requests
driverop = webdriver.ChromeOptions()
driverop.add_argument("--start-maximized")
proxy = "118.174.233.45:44061"
driverop = webdriver.ChromeOptions()
driverop.add_argument('--proxy-server=%s' % proxy)
driver = webdriver.Chrome("chromedriver/chromedriver",options=driverop)
driver.get("https://accounts.google.com/o/oauth2/auth/oauthchooseaccount?client_id=717762328687-iludtf96g1hinl76e4lc1b9a82g457nn.apps.googleusercontent.com&scope=profile%20email&redirect_uri=https%3A%2F%2Fstackauth.com%2Fauth%2Foauth2%2Fgoogle&state=%7B%22sid%22%3A1%2C%22st%22%3A%2259%3A3%3Abbc%2C16%3Af9ef16faad8743e2%2C10%3A1609613474%2C16%3Aa2c13dc2511eb0d0%2Ccb47135ca2a3bc9ca4ee712429ddf5c0935588f518c964242057bb74b818d4de%22%2C%22cdl%22%3Anull%2C%22cid%22%3A%22717762328687-iludtf96g1hinl76e4lc1b9a82g457nn.apps.googleusercontent.com%22%2C%22k%22%3A%22Google%22%2C%22ses%22%3A%22e15efb6754f4498991cd1d37a967f325%22%7D&response_type=code&flowName=GeneralOAuthFlow")
driver.find_element_by_id("identifierId").send_keys(EMAIL)
driver.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > div:nth-child(3)').click()
driver.save_screenshot("sample.png")
url = 'http://2captcha.com/in.php'
API_KEY = "---"
files = {'file': open('sample.png',"rb")}
data = {'key': API_KEY, 'method': 'post'}
r = requests.post(url, files=files, data=data)
if r.ok:
print(r)
url = "http://2captcha.com/in.php?key="+API_KEY+"&action=get&id="+r.text[3:]
for xr in range(1, 10):
sleep(1.5) # wait 5 sec.
resp = requests.get(url)
if resp.text[0:2] == 'OK':
break
else:
print(resp)
If you use the driver.save_screenshot you will save the current window, according the documentation (WebDriver.save_screenshot). Try to return the element and use the webelement.screenshot method (WebElement.screenshot)
from selenium import webdriver
import requests
driverop = webdriver.ChromeOptions()
driverop.add_argument("--start-maximized")
proxy = "118.174.233.45:44061"
driverop = webdriver.ChromeOptions()
driverop.add_argument('--proxy-server=%s' % proxy)
driver = webdriver.Chrome("chromedriver/chromedriver",options=driverop)
driver.get("https://accounts.google.com/o/oauth2/auth/oauthchooseaccount?client_id=717762328687-iludtf96g1hinl76e4lc1b9a82g457nn.apps.googleusercontent.com&scope=profile%20email&redirect_uri=https%3A%2F%2Fstackauth.com%2Fauth%2Foauth2%2Fgoogle&state=%7B%22sid%22%3A1%2C%22st%22%3A%2259%3A3%3Abbc%2C16%3Af9ef16faad8743e2%2C10%3A1609613474%2C16%3Aa2c13dc2511eb0d0%2Ccb47135ca2a3bc9ca4ee712429ddf5c0935588f518c964242057bb74b818d4de%22%2C%22cdl%22%3Anull%2C%22cid%22%3A%22717762328687-iludtf96g1hinl76e4lc1b9a82g457nn.apps.googleusercontent.com%22%2C%22k%22%3A%22Google%22%2C%22ses%22%3A%22e15efb6754f4498991cd1d37a967f325%22%7D&response_type=code&flowName=GeneralOAuthFlow")
driver.find_element_by_id("identifierId").send_keys(EMAIL)
element = driver.find_element_by_css_selector('.VfPpkd-LgbsSe-OWXEXe-k8QpJ > div:nth-child(3)').click()
element.screenshot("sample.png")
url = 'http://2captcha.com/in.php'
API_KEY = "---"
files = {'file': open('sample.png',"rb")}
data = {'key': API_KEY, 'method': 'post'}
r = requests.post(url, files=files, data=data)
if r.ok:
print(r)
url = "http://2captcha.com/in.php?key="+API_KEY+"&action=get&id="+r.text[3:]
for xr in range(1, 10):
sleep(1.5) # wait 5 sec.
resp = requests.get(url)
if resp.text[0:2] == 'OK':
break
else:
print(resp)
Regarding your comment, I think your problem is using 2captcha API?
If so, instead of using request module, try their in-house API TwoCaptcha.
Install it by: pip3 install 2captcha-python
I have a snippet here that you can try to upload your sample:
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from twocaptcha import TwoCaptcha
api_key = os.getenv('APIKEY_2CAPTCHA', 'YOUR_API_KEY')
solver = TwoCaptcha(api_key)
try:
result = solver.normal('path/to/captcha.jpg')
except Exception as e:
sys.exit(e)
else:
sys.exit('solved: ' + str(result))
No need to use selenium and more lines of code to solve captcha. Just use the below short code, it will resolve and provide the response token.
Note: It will handle, image selection as well (car, bus, ship, truck and so on)
-- sitekey - inspect element and find data-sitekey attribute, you will get it
-- url - your webite url
import requests
from twocaptcha import TwoCaptcha
twoCaptcha =TwoCaptcha('xxxxxxxxxxxxxxx') # Your 2captcha API key
captcha_token = twoCaptcha.recaptcha(sitekey='xxxxxxxxxxxxxx',
url='website url')
print(captcha_token)
I am trying to learn how to do WhatsApp bot. so I took someone's code from the internet and tried to change it so it will fit my WhatsApp, the problem is when i run it he cant find the unread messages and always pressing the second chat i have in my chat list.
error line: list index out of range
here is the code, I hope you will be able to help me with this :)
source code: https://blog.usejournal.com/build-a-basic-news-fetching-whatsapp-bot-in-python-under-60-lines-of-code-2d992faf7f79
from logging import root
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import time
from urllib3.util import url
browser = webdriver.Firefox(executable_path='realPathIsHere')
browser.get('https://web.whatsapp.com')
def getNews():
text_box = browser.find_element_by_class_name("_3uMse")
response = "Let me fetch and send top 5 latest news:\n"
text_box.send_keys(response)
soup = BeautifulSoup(requests.get(url).content, "html5lib")
articles = soup.find_all('article',
class_="MQsxIb xTewfe R7GTQ keNKEd j7vNaf Cc0Z5d YKEnGe EyNMab t6ttFe Fm1jeb EjqUne")
news = [i.find_all('a', class_="ipQwMb Q7tWef")[0].text for i in articles[:5]]
links = [root + i.find('a')['href'][1:] for i in articles[:5]]
links = [requests.get("http://thelink.la/api-shorten.php?url=" + link).content.decode() for link in links]
for i in range(5):
text_box.send_keys(news[i] + "==>" + links[i] + "\n")
bot_users = {} # A dictionary that stores all the users that sent activate bot
while True:
unread = browser.find_elements_by_class_name("ZKn2B")
name, message = '', ''
if len(unread) > 0:
ele = unread[-1]
action = webdriver.common.action_chains.ActionChains(browser)
action.move_to_element_with_offset(ele, 0, -20) # move a bit to the left from the green dot
# Clicking couple of times because sometimes whatsapp web responds after two clicks
try:
action.click()
action.perform()
action.click()
action.perform()
except Exception as e:
pass
try:
name = browser.find_element_by_class_name("Pv-sE").text # Contact name
message = browser.find_elements_by_class_name("vW7d1")[-1]
if 'activate bot' in message.text.lower():
if name not in bot_users:
bot_users[name] = True
text_box = browser.find_element_by_class_name("_3uMse")
response = "Hi " + name + ". Tal's Bot here :). Now I am activated for you\n"
text_box.send_keys(response)
if name in bot_users:
if 'show' in message.text.lower() and 'news' in message.text.lower():
getNews()
if 'deactivate' in message.text.lower():
if name in bot_users:
text_box = browser.find_element_by_class_name("_3uMse")
response = "Bye " + name + ".\n"
text_box.send_keys(response)
del bot_users[name]
except Exception as e:
print(e)
pass
sleep(2) # A 2 second pause so that the program doesn't run too fast
I dont know why but this is working now :)
I am new to web scraping so please forgive my ignorance.
I built a program to scrape Zillow, and everything has worked fine for the most part. My problem is I am using a proxy service called proxycrawl that easily allows me to integrate proxies into my program. This is done by placing https://api.proxycrawl.com/?token=xxx&url= before my actual URL. What I have noticed is that when the program clicks on an "a" tag, the URL changes to the example below:
Before:
Before Click
After:
After Click
Any 11 clicks through the program or manually result in the site changing to the proxycrawl site, where I get the 404 error. Any ideas?
#Browser open
print(".....Opening Browser.....")
Browser = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver')
Browser.maximize_window()
#browser page
url = urllib.parse.quote_plus('https://www.zillow.com/homes/for_sale/Bakersfield-CA-93312/house,mobile,land,townhouse_type/97227_rid/35.4606,-119.037467,35.317856,-119.200888_rect/12_zm/0_mmm/')
Browser.get('https://api.proxycrawl.com/?token=xxx&url=' + url)
print("Opening Zillow")
time.sleep(10)
last_page = int(Browser.find_element_by_xpath("""//ol[#class="zsg-pagination"]//li[last()-1]""").text)
#print last_page
page = 0
count = 0
csv_file = open('listings.csv','w')
fieldnames = ['address', 'price', 'zestimate', 'beds', 'baths', 'feet', 'desc', 'Type', 'year_built', 'heating', 'cooling', 'parking', 'lot',
'days_on_market', 'pricepsqr', 'saves', 'interior', 'spaces_amenities', 'construction', 'exterior', 'parking1', 'mls', 'other']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for i in range(last_page):
page = page + 1
n = 0
listings = Browser.find_elements_by_xpath("""//*[#id="search-results"]/ul/li""")
for i in range(len(listings)):
n = i + 1
listing_dict = {}
print("Scraping the listing number {0} on page {1}, the count is {2}".format(n, page, count))
if (count) % 11 == 0:
listings = Browser.find_elements_by_xpath('//*[#id="search-results"]/ul/li')
time.sleep(2)
try:
# Finds Listings
listings = Browser.find_elements_by_xpath("""//*[#id="search-results"]/ul/li""")
print("Looking Up listings")
# Opens Listing
listings[i].find_elements_by_tag_name('a')[0].click()
print("Opening Listing")
time.sleep(2)
# Opens "See More Tab"
Browser.find_element_by_partial_link_text('See More').click()
# Prepare for Scrape
time.sleep(2)
I did speak with proxycrawl, and they stated that the URL had to be encoded, which I did do with no luck. After encoding, I replied and got the following statement:
"You are sending your requests double encoded and your get a response of pc_status: 602. Those requests are failing and you should fix them. Please only encode the URLs once, encoding the URLs more than once will result in a failing request."
It look like the page is trying to redirect you relatively.
In this specific use case, you could hack your way around the encoding issue by doing something similar to the following
# https://api.proxycrawl.com/homes/for_sale/Test/one,two
x = driver.current_url
#/homes/for_sale/Test/one,two
r = x[26:]
# base url = https://api.proxycrawl.com/?token=xxx&url=
u = base_url + r
driver.get(u)
Case :
I am trying to extract number of pages data from a site. I create a filter in the page with the below
code:
fp = webdriver.FirefoxProfile()
fp.set_preference("javascript.enabled", True)
b = webdriver.Firefox(firefox_profile=fp)
b.get(url)
time.sleep(10)
search = b.find_element_by_name("rb")
search.clear()
search.send_keys('dove')
search.send_keys(Keys.ESCAPE)
search.submit()
shampoo_sel = b.find_element_by_id('flt-46')
shampoo_sel.click()
conditioner_sel = b.find_element_by_id('flt-47')
conditioner_sel.click()
time.sleep(5)
search_url = b.current_url
dp = urllib2.urlopen(search_url).read()
dp_soup = BeautifulSoup(dp)
search_page_num = dp_soup.find("li", { "id" : "pagContinue" })
print search_page_num
while i try saving the code with the current URL ( both the URLs before and after the Filter is same and hence unable to get the exact number of pages after filter)
what should I do in this case ???