Error when using Requests with python 3.5 recursively (GooglePlaces API) - python

I have been having a problem where I try to send a get request and if there is a next page token in the result it will then take that link and execute another request recursively until there is no next page token in the result.
The first request works fine but when there is a next page token in the response and it tries to execute the new request the result is an Invalid ReSponse but if I take the link that was given from the result and use it in postman or on my browser everything is fine.
I'm assuming it has something to requests running on different threads at the same time.
The second response from request using Python:
{'html_attributions': [], 'status': 'INVALID_REQUEST', 'results': []}
Here is what I have:
import requests
def getPlaces(location,radius,type, APIKEY):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&type="+type+"&key="+APIKEY
print('Getting results for type ' + type + '...')
r = requests.get(url)
response = r.json()
results = []
if response['status'] == 'ZERO_RESULTS':
print("Did not find results for the type "+type)
else:
print("Results for type "+type)
for result in response['results']:
results.append(result)
print(result)
print('Printing results')
print(results)
if 'next_page_token' in response:
print("There is a next page")
page_token = response['next_page_token']
print(page_token)
next_results = getNextPlace(page_token,APIKEY)
print(next_results)
results.append(next_results)
return results
# Get the rest of the results
def getNextPlace(page_token,APIKEY):
print('...')
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+'&radius='+radius+'&type='+type+'&pagetoken=' + page_token + '&key=' + APIKEY
print(next_url)
r = requests.get(next_url)
response = r.json()
results = []
print(response)
if response['status'] == 'ZERO_RESULTS':
print("Did not find results")
elif response['status'] == 'INVALID_REQUEST':
print('Invalid response')
else:
for next_result in response['results']:
results.append(next_result)
print(next_result)
if 'next_page_token' in response:
new_page_token = response['next_page_token']
getNext = getNextPlace(new_page_token,APIKEY)
results.append(getNext)
return results

Figured out the issue!
Google API doesn't allow consecutive requests to its API if the last request was within ~2 seconds.
What I did have I just had the program sleep for 3 seconds and the sent the request.
Now everything is working fine

What you are trying to do can be seen in one function like:
def getPlaces(location,radius,API,i,type):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&key="+API+"&types="+type
r = requests.get(url)
response = r.json()
results = []
for result in response['results']:
results.append(result)
l=[]
while True:
if 'next_page_token' in response:
page_token = response['next_page_token']
l.append(page_token)
next_url = url+'&pagetoken='+l[i]
i=i+1
time.sleep(3)
r = requests.get(next_url)
response = r.json()
for next_result in response['results']:
results.append(next_result)
else:
break
return results

Your code print "invalid response" because response['status'] == 'INVALID_REQUEST', so it is google api service think your url request is invalid.
As this document says, the parameter location, radius, type and key is required, and the pagetoken is optional. So your second request url is invalid because it does not have the all required key.
Maybe you should try change the url to :
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+"&radius="+radius+"&type="+type+"&key="+APIKEY + "&pagetoken=" + page_token

Related

Is there a way to detect url changes with Python

I made a site phishing searcher using Python. Here is the code that i use
output = []
for i in range (100):
for subdomain_count in [1, 2, 3, 4]:
webtypo = random.choice(typo) + '.odoo.com'
http = random.choice(HTTP)
data = random.sample(web, k=subdomain_count) + [webtypo]
delims = (random.choices(delimiters, k=subdomain_count)
address = ''.join([a+b for a, b in zip(data, delims)])
weburl = http + address
output.append(weburl)
exist=[]
for c in output:
try:
request = requests.get(c)
if request.status_code == 200:
exist.append(c)
print('Exist')
elif request.status_code == 204:
print('user does not exist')
except:
print('Not Exist')
When i check the Request URL, the link changes to https://www.odoo.com/typo?domain=minecraftnet.odoo.com&autodbname=edcandroidbni123&hosting=eu142a, is there a way to detect in odoo if a link of a website changes it would print out web does not exist, but if there's a site that uses the odoo.com tld it would print out exist.
Yes, you can use the response.url parameter to get the final URL after any redirects.
response = requests.get(c)
final_url = response.url
Note this only handles 3xx redirects, not javascript redirects. Requests will never execute javascript.

Python ThreadPoolExecutor: future.result randomly returns None

So I have the following code for threading:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for url in total_links:
futures.append(executor.submit(process_url, input_url=url))
for future in concurrent.futures.as_completed(futures):
print('RESULT INSIDE')
result = future.result() #Returns None randomly
print(result)
records.append(result)
At times future.result() returns None. Below is the process_url function:
def process_url(input_url):
res = None
sleep(0.07)
r = session.get(input_url, headers=headers, cookies=c, cert=cert, timeout=20)
if r.status_code == 200:
soup = BeautifulSoup(r.text, 'lxml')
res = get_status(soup)
print('Inside Process URL')
print(res)
print('======================')
return res
res will always have data available but that data is not being fetched under thread. I also add that it is happening randomly that is, If I run script 5 times then at least once it returns None.
So there may be several issues, but most notably if you don't receive a status code of 200 the futures object will return None. For example, say a thread goes and retrieves a URL, but a following thread is unable to reach a host, the request gets timed out, etc. that thread will report back None.
You could validate this behavior by:
Making sure each request is actually working, e.g making sure res=None changes to to the success state in your thread. What happens if the request times out? Are you returning those details? If not, res=None
Adding logic to show errors, e.g. make sure res=None is referencing a response.
def process_url(input_url):
res = None
sleep(0.07)
r = session.get(input_url, headers=headers, cookies=c, cert=cert, timeout=20)
if r.status_code == 200:
soup = BeautifulSoup(r.text, 'lxml')
res = get_status(soup)
print('Inside Process URL')
print(res)
print('======================')
if r.status_code != 200:
res = r.text
return res
I would potentially use a dictionary instead of an array as the futures object so you can reference each thread by it's url. Right now, its by index. This way you can know which url is the culprit.
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
# Start the load operations and mark each future with its URL
future_to_url = {executor.submit(load_url, url, 20): url for url in URLS}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
return "Something went wrong!"
else:
print('%r page is %d bytes' % (url, len(data)))
return "Success!"
But the process you're running is not "randomly" returning None. The res object is referencing None during your execution. The only time you may also receive a None object is if the thread isn't collecting a result. Here are the docs for reference.
https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Future.result

Skip exceptions in python

I have a newbie questions:
let say I have this list of stock in python
import requests
list = ["AMZN","APPL", "BAC"]
try:
for x in list:
url ='https://financialmodelingprep.com/api/v3/quote-short/'+x+'?apikey=demo'
response = requests.request('GET', url)
result = response.json()
print(result[0]["price"])
except:
pass
the second ticker will throw an exceptions, how do I make python to run the third ticker no matter what happen to the second ticker requests?
Use try-except inside for loop like below
import requests
list = ["AMZN","APPL", "BAC"]
for x in list:
try:
url ='https://financialmodelingprep.com/api/v3/quote-short/'+x+'?apikey=demo'
response = requests.request('GET', url)
result = response.json()
print(result[0]["price"])
except:
pass
You can use continue
import requests
list = ["AMZN","APPL", "BAC"]
for x in list:
try:
url ='https://financialmodelingprep.com/api/v3/quote-short/'+x+'?apikey=demo'
response = requests.request('GET', url)
result = response.json()
print(result[0]["price"])
except:
continue

concatenate JSON paginated response with python requests

I want to concatenate the JSON response of WordPress API with Python's library requests. Here is my code:
import requests
results = []
pagination = 1
url = 'https://example.com/wp-json/wp/v2/posts?after=2019-01-01T00:00:00&before=2019-02-01T00:00:00&per_page=5&page={}'.format(pagination)
r = requests.get(url)
data = r.json()
for i in data:
results.append(i)
while r.status_code == 200:
pagination += 1
r = requests.get(url)
data = r.json()
for i in data:
results.append(i)
else:
break
print(results)
I end up with the 1st page only.
You are binding the value when formatting the URL initially, this is never updated again.
Probably easier to pass some of your params as a dictionary to requests instead of on the URL, and have the pagination update on each iteration, for example:
import requests
results = []
pagination = 1
url = 'https://example.com/wp-json/wp/v2/posts?after=2019-01-01T00:00:00&before=2019-02-01T00:00:00'
params = {'per_page': 5, 'page': pagination}
r = requests.get(url, params=params)
data = r.json()
for i in data:
results.append(i)
while r.status_code == 200:
pagination += 1
params['page'] = pagination
r = requests.get(url, params=params)
data = r.json()
for i in data:
results.append(i)
else:
break
print(results)

Google URL Shortener API always returns same page with pagetoken

I am attempting to use the Google UrlShortener API to retrieve history with OAuth2 and an API key. I am getting a 200 OK response but when I try and get subsequent pages using pagetoken or pageToken as a query parameter I always get the same nextPageToken and the same page of results. Oddly, the browser based Google API interaction uses start-token not pagetoken or pageToken but when I use start-token I don't get a 200 OK.
How do I get pagination to work with the UrlShortener API?
Here is my code:
import requests
import json
import time
import settings
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.tools import run_flow
from oauth2client.file import Storage
def history():
"""Look up a user's history"""
flow = OAuth2WebServerFlow(client_id=settings.OAUTH2_CLIENT_ID,
client_secret=settings.CLIENT_SECRET,
scope='https://www.googleapis.com/auth/urlshortener',
redirect_uri='http://127.0.0.1:5000/callback')
storage = Storage('creds.data')
credentials = run_flow(flow, storage)
print("access_token: {}".format(credentials.access_token))
headers = {'Content-Type': 'application/json', 'Authorization': 'Bearer {}'.format(credentials.access_token)}
raw_url = 'https://www.googleapis.com/urlshortener/v1/url/history'
url = raw_url + '?key={}'.format(settings.API_KEY)
r = requests.get(url=url, headers=headers)
if r.ok:
output = "The history is {}.".format(r.json())
print(output)
if 'nextPageToken' in r.json().keys():
morePages = True
npt = r.json()['nextPageToken']
r_paged = None
while morePages:
time.sleep(2)
url = raw_url + '?pagetoken={}&key={}'.format(npt, settings.API_KEY)
r_paged = requests.get(url=url, headers=headers)
if r_paged.ok:
if 'nextPageToken' in r_paged.json().keys():
npt = r_paged.json()['nextPageToken']
morePages = True
else:
morePages = False
break
output = "The history is {}.".format(r_paged.json())
print(output)
else:
output = "Invalid request. Status code = {}, json = {}".format(r_paged.status_code, r_paged.json())
print(output)
else:
output = "Invalid request. Status code = {}, json = {}".format(r.status_code, r.json())
print(output)
Fixed code follows:
# New import:
import urllib.parse
# // snip
time.sleep(2)
f = {'start-token':npt, 'key': settings.API_KEY}
formatted = '?' + urllib.parse.urlencode(f)
url = raw_url + formatted
r_paged = requests.get(url=url, headers=headers)
# // snip
Basically, ignore the documentation. Do NOT use pageToken, use start-token. Furthermore, you need to use the url parser suitable for Python 3 for urlencoding.

Categories