concatenate JSON paginated response with python requests - python

I want to concatenate the JSON response of WordPress API with Python's library requests. Here is my code:
import requests
results = []
pagination = 1
url = 'https://example.com/wp-json/wp/v2/posts?after=2019-01-01T00:00:00&before=2019-02-01T00:00:00&per_page=5&page={}'.format(pagination)
r = requests.get(url)
data = r.json()
for i in data:
results.append(i)
while r.status_code == 200:
pagination += 1
r = requests.get(url)
data = r.json()
for i in data:
results.append(i)
else:
break
print(results)
I end up with the 1st page only.

You are binding the value when formatting the URL initially, this is never updated again.
Probably easier to pass some of your params as a dictionary to requests instead of on the URL, and have the pagination update on each iteration, for example:
import requests
results = []
pagination = 1
url = 'https://example.com/wp-json/wp/v2/posts?after=2019-01-01T00:00:00&before=2019-02-01T00:00:00'
params = {'per_page': 5, 'page': pagination}
r = requests.get(url, params=params)
data = r.json()
for i in data:
results.append(i)
while r.status_code == 200:
pagination += 1
params['page'] = pagination
r = requests.get(url, params=params)
data = r.json()
for i in data:
results.append(i)
else:
break
print(results)

Related

trying to make an Yelp API call with a list of business IDs

When I run the code it's giving me this syntax error:
requests.exceptions.MissingSchema: Invalid URL 'h': No scheme supplied. Perhaps you meant http://h?
Here is the code I am working with:
from yelp_api_key import YELP_KEY
from yelp_api_location import loc_ids
MY_API_KEY = YELP_KEY
BUSINESS_PATH = f'https://api.yelp.com/v3/businesses/{loc_ids}/reviews'
HEADERS = {'Authorization': 'bearer %s' % MY_API_KEY}
PARAMETERS = {'locale': 'en_US'
}
for links in BUSINESS_PATH:
response = requests.get (url=links,
params=PARAMETERS,
headers=HEADERS)
business_data = response.json()
data = business_data['reviews']
print(data)
for x in data:
quotes = (x['text'])
print(quotes)
Below is the code that is working for me. I just want to be able to call multiple APIs without having to list the endpoints every time. Any suggestions would be great, TIA!
MY_API_KEY = YELP_KEY
BUSINESS_PATH = [f'https://api.yelp.com/v3/businesses/eL4d1tHv1mFoepoS_3rGbw/reviews',
f'https://api.yelp.com/v3/businesses/RzS-wNTycqB5WA34JfgW0g/reviews',
f'https://api.yelp.com/v3/businesses/PyV1e_OebaWm1cGUwtDvHA/reviews',
f'https://api.yelp.com/v3/businesses/dcbALMl6oyv_fdJ6dZGxzA/reviews',
f'https://api.yelp.com/v3/businesses/4uRA53NIl82a3QeZX-PcRw/reviews']
HEADERS = {'Authorization': 'bearer %s' % MY_API_KEY}
PARAMETERS = {'locale': 'en_US'
}
reviews = []
for links in BUSINESS_PATH:
# file_name = uuid.uuid1 ()
response = requests.get (url=links,
params=PARAMETERS,
headers=HEADERS)
business_data = response.json()
data = business_data['reviews']
for x in data:
quotes = (x['text'])
# print(quotes)
reviews.append(quotes)

Python: how to extract data from Odata API that contains pages #odata.nextLink

I need to pull data from an Odata API. With code below I do receive data, but only 250 rows.
The JSON contains a key called: #odata.nextLink that contains one value, this is the BASE_URL + endpoint + ?$skip=250
How can I loop through the next pages?
import requests
import pandas as pd
import json
BASE_URL = "base_url"
def session_token():
url = BASE_URL + '/api/oauth/token'
headers = {"Accept": "application\json",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"}
body = {"username":"user",
"password": "pwd",
"grant_type": "password"}
return "Bearer "+ requests.post(url, headers = headers, data = body).json()["access_token"]
def make_request(endpoint, token = session_token()):
headers = {"Authorization": token}
response = requests.get(BASE_URL + endpoint, headers = headers)
if response.status_code == 200:
json_data = json.loads(response.text)
return json_data
make_request("/odata/endpoint")
Following #Marek Piotrowski's advise I modified and came to a solution:
def main():
url = "endpoint"
while True:
if not url:
break
response = make_request("endpoint")
if response.status_code == 200:
json_data = json.loads(response.text)
url = json_data["#odata.nextLink"] # Fetch next link
yield json_data['value']
result = pd.concat((json_normalize(row) for row in main()))
print(result) # Final dataframe, works like a charm :)
Something like that would retrieve all records, I believe (assuming there's #odata.nextLink in json_data indeed):
def retrieve_all_records(endpoint, token = session_token()):
all_records = []
headers = {"Authorization": token}
url = BASE_URL + endpoint
while True:
if not url:
break
response = requests.get(url, headers = headers)
if response.status_code == 200:
json_data = json.loads(response.text)
all_records = all_records + json_data['records']
url = json_data['#odata.nextLink']
return all_records
The code is untested, though. Let me know if it works. Alternatively, you could make some recursive call to make_request, I believe, but you'd have to store results somewhere above the function itself then.
I know that this is late, but you could look at this article from Towards Data Science of Ephram Mwai
He pretty solved the problem with a good script.

I can't get data from JSON with python

I can't seem to get the last recorded price from a website API using JSON. I tried finding the error but it seems okay to me. The code is in python
This is the Url that I have to GET: https://api.independentreserve.com/Public/GetMarketSummary?primaryCurrencyCode=xbt&secondaryCurrencyCode=aud
Python 3.7
import requests
URL = "https://api.independentreserve.com/Public/GetMarketSummary?"
CurrencyCode = "xbt"
SecondaryCode = "aud"
PARAMS = {'primaryCurrencyCode': CurrencyCode, '&secondaryCurrencyCode': SecondaryCode}
r = requests.get(url=URL, params=PARAMS)
data = r.json()
lastprice = data['LastPrice']
print("Last Price:%s" % lastprice)
here is the fixed code
import requests
URL = "https://api.independentreserve.com/Public/GetMarketSummary?"
CurrencyCode = "xbt"
SecondaryCode = "aud"
PARAMS = {'primaryCurrencyCode': CurrencyCode, 'SecondaryCurrencyCode': SecondaryCode}
r = requests.get(url=URL, params=PARAMS)
data = r.json()
lastprice = data['LastPrice']
print("Last Price:%s" % lastprice)
the problem is in the PARAMS dict. you need to change "&secondaryCurrencyCode" to "SecondaryCurrencyCode".
if you had printed the data dict, you would see this:
{'Message': 'Secondary Currency Code is required'}
Removing & in "&secondaryCurrencyCode" will fix the issue.
Fixed code below:
import requests
URL = "https://api.independentreserve.com/Public/GetMarketSummary?"
CurrencyCode = "xbt"
SecondaryCode = "aud"
PARAMS = {'primaryCurrencyCode': CurrencyCode, 'secondaryCurrencyCode': SecondaryCode}
r = requests.get(url=URL, params=PARAMS)
data = r.json()
lastprice = data['LastPrice']
print("Last Price:%s" % lastprice)
API is expecting secondaryCurrencyCode not &secondaryCurrencyCode.
You don't need & sign when you use params.

Urllib2 grab web-page element then reverse it

I need to visit http://www.chiquitooenterprise.com/ reverse the string and access the website using this URL: http://www.chiquitooenterprise.com/password?code=REVERSEDSTRING
How can i do this using urllib2 and Python?
link = "http://www.chiquitooenterprise.com/password"
request = urllib2.Request("http://www.chiquitooenterprise.com/password")
contents = urllib2.urlopen(request).read()
revString = request[::-1]
answer = "http://www.chiquitooenterprise.com/password?code=" + revString
response = urllib2.urlopen(answer)
response = response.read()
print(response)```
link = "http://www.chiquitooenterprise.com/password"
result = requests.get("http://www.chiquitooenterprise.com/password")
contents = result.text
revString = contents[::-1]
answer = f"http://www.chiquitooenterprise.com/password?code={revString}"
response = requests.get(answer)
response = response.text
print(response)

Error when using Requests with python 3.5 recursively (GooglePlaces API)

I have been having a problem where I try to send a get request and if there is a next page token in the result it will then take that link and execute another request recursively until there is no next page token in the result.
The first request works fine but when there is a next page token in the response and it tries to execute the new request the result is an Invalid ReSponse but if I take the link that was given from the result and use it in postman or on my browser everything is fine.
I'm assuming it has something to requests running on different threads at the same time.
The second response from request using Python:
{'html_attributions': [], 'status': 'INVALID_REQUEST', 'results': []}
Here is what I have:
import requests
def getPlaces(location,radius,type, APIKEY):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&type="+type+"&key="+APIKEY
print('Getting results for type ' + type + '...')
r = requests.get(url)
response = r.json()
results = []
if response['status'] == 'ZERO_RESULTS':
print("Did not find results for the type "+type)
else:
print("Results for type "+type)
for result in response['results']:
results.append(result)
print(result)
print('Printing results')
print(results)
if 'next_page_token' in response:
print("There is a next page")
page_token = response['next_page_token']
print(page_token)
next_results = getNextPlace(page_token,APIKEY)
print(next_results)
results.append(next_results)
return results
# Get the rest of the results
def getNextPlace(page_token,APIKEY):
print('...')
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+'&radius='+radius+'&type='+type+'&pagetoken=' + page_token + '&key=' + APIKEY
print(next_url)
r = requests.get(next_url)
response = r.json()
results = []
print(response)
if response['status'] == 'ZERO_RESULTS':
print("Did not find results")
elif response['status'] == 'INVALID_REQUEST':
print('Invalid response')
else:
for next_result in response['results']:
results.append(next_result)
print(next_result)
if 'next_page_token' in response:
new_page_token = response['next_page_token']
getNext = getNextPlace(new_page_token,APIKEY)
results.append(getNext)
return results
Figured out the issue!
Google API doesn't allow consecutive requests to its API if the last request was within ~2 seconds.
What I did have I just had the program sleep for 3 seconds and the sent the request.
Now everything is working fine
What you are trying to do can be seen in one function like:
def getPlaces(location,radius,API,i,type):
url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="+location+"&radius="+radius+"&key="+API+"&types="+type
r = requests.get(url)
response = r.json()
results = []
for result in response['results']:
results.append(result)
l=[]
while True:
if 'next_page_token' in response:
page_token = response['next_page_token']
l.append(page_token)
next_url = url+'&pagetoken='+l[i]
i=i+1
time.sleep(3)
r = requests.get(next_url)
response = r.json()
for next_result in response['results']:
results.append(next_result)
else:
break
return results
Your code print "invalid response" because response['status'] == 'INVALID_REQUEST', so it is google api service think your url request is invalid.
As this document says, the parameter location, radius, type and key is required, and the pagetoken is optional. So your second request url is invalid because it does not have the all required key.
Maybe you should try change the url to :
next_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+location+"&radius="+radius+"&type="+type+"&key="+APIKEY + "&pagetoken=" + page_token

Categories