Requests repeating issue python - python

So I’m using requests python library to make a series of requests ie
Req1 then Req2 then Req 3
Issue is the req1 keeps repeating itself and is not going forward to req2
Any help please
Code
While true:
Try:
session = requests.session()
r = session.get('Url').text #req1
postdata = 'the post data'
myheader = {'the headers'}
n = session.post('Myurl ', data=postdata, headers=myheaders).text #req2
Request keeps repeating the get request

Your indentation could be the problem as only the indented code from the while True loop will be repeated.
This in return would cause the rest of the code to not run as the loop will never end.
Some errors I also noticed were:
After the try: there is no except:
The T in Try: is also uppercased when it should be lowercased
The T in True: should be uppercase as well
The w in While should be lowercased
An proper example would be
while True:
try:
session = requests.session()
r = session.get('https://example.com').text #req1
postdata = {'data': 'data'}
myheader = {'header': 'header'}
n = session.post('https://example.com', data=postdata, headers=myheaders).text #req2
except:
# Some logic for after a error occurs
# ex:
print("Error has occured")
Now this is just nit picking and isn't all that relevant but the point of using requests.session() is to be a faster version of typing out requests so setting it to session is a little redundant.

Related

Python API script

I am making a python script using API of a free test automation website called TestProject.
Link to their API: https://api.testproject.io/docs/v2/
Basically what i want to do is grab pdf of reports of all tests and save them somewhere.
But to make the GET request to do that i first need projectID and jobID which i already wrote functions getting them and saving them in the array.
But now i have a problem where its looping through both lists and not using correct projectID and jobID and its throwing errors because it does not exist.
So what i need is something to check if jobID is in projectID so that way i can make a GET request to get all the executionID's to get the PDF of the report.
I am kinda new to programming so i would love any help i can get. If anyone has any better solutions please feel free to let me know.
My script:
import requests
import json
import csv
from datetime import datetime
from jsonpath_ng import jsonpath, parse
API_key = 'api_key'
headers = {'Authorization':'{}'.format(API_key)}
list_projectId = []
list_jobId = []
list_executionId = []
ParseData_projectId = parse('$..id')
ParseData_jobId = parse('$..id')
ParseData_executionId = parse('$..id')
def parsing (response,ParseData,list_data):
# parses data and appends it to the list
Data = json.loads(response)
Parsaj = ParseData
Podatki = Parsaj.find(Data)
for i in range(0, len(Podatki)):
vrednost = Podatki[i].value
list_data.append(vrednost)
def projectId():
# gets all projectId's and saves them in list_projectId
url = 'https://api.testproject.io/v2/projects?_start=0'
response = requests.get(url,headers=headers)
response_json = response.json()
converted = json.dumps(response_json)
parsing(converted,ParseData_projectId,list_projectId)
def jobId():
# gets all jobId's and saves them in list_jobId
for i in range(0, len(list_projectId)):
id = list_projectId[i]
url = 'https://api.testproject.io/v2/projects/{}'.format(id) + '/jobs?onlyScheduled=false&_start=0'
response = requests.get(url,headers=headers)
response_json = response.json()
converted = json.dumps(response_json)
parsing(converted,ParseData_jobId,list_jobId)
def executionId():
# Their API link:
# https://api.testproject.io/v2/projects/{projectId}/jobs/{jobId}/reports?_start=0
# the for loop below does not work here is where i need the help:
for i in range(0, len(list_projectId)):
project_id = list_projectId[i]
job_id = list_jobId[i]
url = 'https://api.testproject.io/v2/projects/{}'.format(project_id) + '/jobs/{}'.format(job_id) + '/reports?_start=0'
response = requests.get(url,headers=headers)
response_json = response.json()
converted = json.dumps(response_json)
parsing(converted,ParseData_executionId,list_executionId)
projectId()
print("----------LIST PROJECT ID: ----------")
print(list_projectId)
print("")
jobId()
print("----------LIST JOB ID: ----------")
print(list_jobId)
executionId()
print("----------LIST EXECUTION ID: ----------")
print(list_executionId)
you have to use 'in' operator to check the value exist in the list data structure.

REQUESTS Maximum number of attempts with a waiting time and in case of failure, give a message in Python

the situation is that sometimes a request does not load or gets stuck in Python, in case that happens or any error occurs, I would like to retry it "n" times and wait up to a maximum of 3 seconds for each one and in case the attempts are over tell me a message that f"Could not process {type_1} and {type_2}". Everything runs in parallel with concurrent.futures. Could you help me with that?
import Requests
import concurrent.futures
import json
data = [['PEN','USD'],['USD','EUR']]
def currency(element):
type_1 =element[0]
type_2 = element[1]
s = requests.Session()
url = f'https://usa.visa.com/cmsapi/fx/rates?amount=1&fee=0&utcConvertedDate=07%2F26%2F2022&exchangedate=07%2F26%2F2022&fromCurr={type_1}&toCurr={type_2}'
a = s.get(url)
response = json.loads(a)
value = response["convertedAmount"]
return value
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(
currency, data)
for value in results:
print(value)
Your code is almost there. Here, I modified a few things:
from concurrent.futures import ThreadPoolExecutor
import time
import requests
def convert_currency(tup):
from_currency, to_currency = tup
url = (
"https://usa.visa.com/cmsapi/fx/rates?amount=1&fee=0"
"&utcConvertedDate=07%2F26%2F2022&exchangedate=07%2F26%2F2022&"
f"fromCurr={from_currency}&toCurr={to_currency}"
)
session = requests.Session()
for _ in range(3):
try:
response = session.get(url, timeout=3)
if response.ok:
return response.json()["convertedAmount"]
except requests.exceptions.ConnectTimeout:
time.sleep(3)
return f"Could not process {from_currency} and {to_currency}"
data = [["VND", "XYZ"], ['PEN','USD'], ["ABC", "XYZ"], ['USD','EUR'], ["USD", "XXX"]]
with ThreadPoolExecutor() as executor:
results = executor.map(convert_currency, data)
for value in results:
print(value)
Notes
I retried 3 times (see the for loop)
Use timeout= to specify the time out (in seconds)
The .ok attribute will tell if the call was successful
No need to import json as the response object can JSON decode with the .json() method
You might experiment between ThreadPoolExecutor and ProcessPoolExecutor to see which one performs better

Why json output so small?

This output should be way longer than it is in here.
I start with a GET request, I parse a JSON list and extract the id, which I then call on the second function, that will give me a second ID which then I will use to call on the 3rd function. But, I am only getting one entry whereas I should be getting way more entries.
The code is the following:
from requests.auth import HTTPBasicAuth
import requests
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def countries():
data = requests.get("https://localhost:8543/api/netim/v1/countries/", verify=False, auth=HTTPBasicAuth("admin", "admin"))
rep = data.json()
return [elem.get("id","") for elem in rep['items']]
def regions():
for c in countries():
url = requests.get("https://localhost:8543/api/netim/v1/countries/{}/regions".format(c), verify=False, auth=HTTPBasicAuth("admin", "admin"))
response = url.json()
return [cid.get("id","") for cid in response['items']]
def city():
for r in regions():
api = requests.get("https://localhost:8543/api/netim/v1/regions/{}/cities".format(r), verify=False, auth=HTTPBasicAuth("admin", "admin"))
resolt = api.json()
return(json.dumps([{"name":r.get("name",""),"id":r.get("id", "")} for r in resolt['items']], indent=4))
city()
print(city())
The output is the following :
[
{
"name": "Herat",
"id": "AF~HER~Herat"
}
]
I should have a huge list, so I am not sure what am I missing?
You need to go through all the iterations of your loop and collect the results, then jsonify the and return them.
data = []
for r in regions():
api = requests.get("https://localhost:8543/api/netim/v1/regions/{}/cities".format(r), verify=False, auth=HTTPBasicAuth("admin", "admin"))
resolt = api.json()
data.extend([{"name":r.get("name",""),"id":r.get("id", "")} for r in resolt['items']])
return json.dumps(data, indent=4)
This would be a fix for city() but you have the same problem in all your functions. return immediately exits the function and does not do anything else, effectively all your for loops are doing 1 iteration.
I'll update my example here to give you a better idea what's occurring.
Your functions are basically this:
def test_fn():
for i in [1,2,3,4]:
return i
# output:
1
# We never see 2 or 3 or 4 because we return before looping on them.
What you want:
def test_fn():
results = []
for i in [1,2,3,4]:
results.append(i)
return results
# output
[1,2,3,4]
It seems like you understand that the for loop is going to take some action once for each element in the list. What you're not understanding is that return ends the function NOW. No more for loop, no more actions, and in your code, you immediately return inside the for loop, stopping any further action.

Python Navigation Timeout Exceeded: 8000 ms exceeded

I keep getting this error on multiple scripts, I'm doing a lot of scraping, I have a loop that scrapes through hundreds of pages, and at some point the scripts just stops due to this error.
Here's an example of a script
Example 2:
def scrape(urls):
for url in urls:
session = HTMLSession()
resp = session.get(url)
resp.html.render()
try:
phone = resp.html.find('span.phone')[0].text
except IndexError:
phone = None
biz_name = resp.html.find('h1')[0].text
try:
biz_desc = resp.html.find('p.biz-description-text')[0].text
except IndexError:
biz_desc = None
biz_location = resp.html.find('span.title-address-text')[0].text
city = biz_location.split(',')[-1]
print(
f'phone is: {phone}\nthe business name is: {biz_name}\nthe description is: {biz_desc}\nthe city is: {city}')
import_data(biz_name, phone, biz_desc, city)
def import_data(name, phone, desc, city):
global keyword
wp_title_box = driver.find_element_by_xpath('//*[#id="title"]')
wp_title_box.send_keys(name)
time.sleep(1)
wp_desc_box = driver.find_element_by_xpath('//*[#id="content_ifr"]')
wp_desc_box.send_keys(desc)
time.sleep(1)
new_field_button = driver.find_element_by_xpath('//*[#id="newmeta-submit"]')
select_box = Select(driver.find_element_by_xpath('//*[#id="metakeyselect"]'))
select_box.select_by_value("ad_city")
wp_city_fill = driver.find_element_by_xpath('//*[#id="metavalue"]')
wp_city_fill.send_keys(city)
new_field_button.click()
time.sleep(2)
select_box.select_by_value("ad_phone")
wp_city_fill = driver.find_element_by_xpath('//*[#id="metavalue"]')
wp_city_fill.send_keys(phone)
new_field_button.click()
time.sleep(2)
select_box.select_by_value("ad_promote")
wp_city_fill = driver.find_element_by_xpath('//*[#id="metavalue"]')
wp_city_fill.send_keys('1')
new_field_button.click()
time.sleep(2)
save_btn = driver.find_element_by_xpath('//*[#id="save-post"]')
driver.execute_script("window.scrollTo(0,0);")
time.sleep(1)
save_btn.click()
time.sleep(2)
driver.find_element_by_xpath('//*[#id="menu-posts"]/ul/li[3]/a').click()
time.sleep(2)
I've added example 2, as example 1 was solved by a loop provided below.
In the second example the script should end since I'm using a for loop, once it has finished going through all of the urls and importing them, it should be done, am I missing something?
Your program never terminates. Number calls scrape, which calls number, which calls scrape, which calls number etc. If you are going to use recursion you need to have a terminating or base case.
One suggestion is using a counter to track the depth of your recursion and then increment the counter at each step until it reaches the specified depth.
I do think for what you are doing you do not need recursion at all which is expensive due to the overhead of function calls. A simple loop would be fine:
import random
import urllib3
from requests_html import HTMLSession
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def scrape(rand_num):
session = HTMLSession()
resp = session.get("https://www.example.com/prize/?d=" + '92' + str(rand_num))
resp.html.render()
print(f'trying coupon code 92{rand_num}')
prize = resp.html.find(containing="You've won a prize")
print(prize)
if prize:
print("https://www.example.com/prize/?d=" + '92' + str(rand_num))
def number():
for i in range(99999999):
x = random.randint(00000000, 99999999)
scrape(x)
number()

TypeError: a bytes-like object is required, not 'str' when submitting credentials to https website

Yahoo Finance will be finally deprecated this year and I am in the process for an alternate reliable data provider. I am doing some web scraping to download historical stock data.
In a nutshell,
I have signed up for their service. =)
I am using fake_user agent and select one header at random to use at login.
I am trying to pass the credentials in the following line : payload = {'redirect_url': base64.b64encode('https://website.com'), 'site': 'US', 'login_username': login_username, 'login_password': login_password}
Retrieve the cookie.
Download the data and handle exceptions as required.
In theory it should work , but I am getting the following error : TypeError: a bytes-like object is required, not 'str'
I have tried converting str to byte as :
login_username = b'username'
or
login_username = username.encode('utf-8')
import base64
import time
import requests
from fake_useragent import UserAgent
tickers = ['AAPL', 'AMZN', 'FB']
#Credentials to login
login_username = b'username'
login_password = b'password'
def get_magic_cookies():
__urls__ = {'url_login': 'https://secure.website.com/login/secure'}
payload = {'redirect_url': base64.b64encode('https://website.com'), 'site': 'MX', 'login_username': login_username,
'login_password': login_password}
headers = {UserAgent().random}
_req = requests.post(__urls__['url_login'], data=payload, headers=headers)
account_login = _req.history[0].headers['location']
req_gsv = requests.get(account_login) # Request for get SID values in content
cookies_l = req_gsv.history[0].cookies
if req_gsv.history:
SID_USER = req_gsv.history[0].cookies['SID']
USER_COOKIES = [_ for _ in req_gsv.history[0].cookies]
USER_REQUEST = payload['login_username']
return cookies_l
#get_magic_cookies()
def main():
stocks_failed = []
stocks_timeout = []
for ticker in tickers:
try:
req = requests.get('https://website.com/p.php?pid=data&daily=1&symbol={0}%5E{1}%2A'.format(ex, ticker)
with open('path\\to\file\\{}.csv'.format(ticker), 'wb') as itd:
itd.write(req.content)
itd.close()
except 'There were no results for your query' in req.content:
print('[ {0} ] Stock Failed\n'.format(ticker))
stocks_failed.append(ticker)
except 'You have made too many requests. Please wait before trying to download again.' in req.content:
print('{} Stock timeout'.format(ticker))
stocks_timeout.append(ticker)
time.sleep(80) # Time of 80 secounds
pass
if __name__ == '__main__':
main()
Couple of questions,
Why is it complaining even after I converted the str to bytes ?
What is the correct way to submit credentials to https website ?
Is there another way to submit credentials without having them hardcoded in the code making it more secure ?
Thanks in advance.

Categories