So, I am testing this piece of code :
import requests
import json
searchTerm = 'parrot'
startIndex = '0'
searchUrl = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=" + \
searchTerm + "&start=" + startIndex
r = requests.get(searchUrl)
response = r.content.decode('utf-8')
result = json.loads(response)
print(r)
print(result)
And the response is :
<Response [200]>
{'responseData': None, 'responseStatus': 403, 'responseDetails': 'This API is no longer available.'}
Seems that I am trying to use the old API and it is deprecated now. When I check on the Google Custom Search API I don't see any way to search straight on google images, is this even possible with the new API ?
It is possible, here is new API reference:
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
import requests
import json
searchTerm = 'parrot'
startIndex = '1'
key = ' Your API key here. '
cx = ' Your CSE ID:USER here. '
searchUrl = "https://www.googleapis.com/customsearch/v1?q=" + \
searchTerm + "&start=" + startIndex + "&key=" + key + "&cx=" + cx + \
"&searchType=image"
r = requests.get(searchUrl)
response = r.content.decode('utf-8')
result = json.loads(response)
print(searchUrl)
print(r)
print(result)
That works fine, I just tried.
Related
I should preface this with I am not a programmer and most of this code was not written by me. I unfortunately have a need and am trying to hack my way through this.
What I am trying to do is chain a few API calls together to ultimately get a list of IPs. What this script does is queries the API and pulls (and prints) a list of device IDs. The device IDs look like this:
akdjlfijoaidjfod
g9jkidfjlskdjf44
3jdhfj4hf9dfiiu4
The device IDs then need to be passed as a parameter in the next API call like this:
https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4 and so on.
I dont know where to begin. Instead of printing the asset ids, I assume they should be stored as a parameter (or variable) and then appended to the URL. I tried doing that with "ID_LIST" but that didnt seem to work. Can you guys point me in the right direction?
import requests
import json
# Define API REST paths
BASE_URL = "https://api.example.com/"
OAUTH_URL_PART = "oauth2/token"
DEVICE_SEARCH = "devices/queries/devices/v1"
DEVICE_DETAILS = "devices/entities/devices/v1"
# Empty auth token to hold value for subsequent request
auth_Token = ""
# Section 1 - Authenticate to Example OAUTH
# Build a dictionary to hold the headers
headers = {
'Content-type': 'application/x-www-form-urlencoded',
'accept': 'application/json'
}
# Build a dictionary to holds the authentication data to be posted to get a token
auth_creds = {}
auth_creds['client_id'] = "<client_id>"
auth_creds['client_secret'] = "<client_secret>"
auth_creds['grant_type'] = "client_credentials"
# Call the API to get a Authentication token - NOTE the authentication creds
print("Requesting token from " + BASE_URL + OAUTH_URL_PART)
auth_response = requests.post(BASE_URL + OAUTH_URL_PART,data=auth_creds, headers=headers)
# Check if successful
if auth_response.status_code != 201:
# Output debug information
print("\n Return Code: " + str(auth_response.status_code) + " " + auth_response.reason)
print("Path: " + auth_response.request.path_url)
print("Headers: ")
print(auth_response.request.headers)
print("Body: " + auth_response.request.body)
print("\n")
print("Trace_ID: " + auth_response.json()['meta']['trace_id'])
else:
# Section 2 - Capture OAUTH token and store in headers for later use
print("Token Created")
# Capture the auth token for reuse in subsequent calls, by pulling it from the response
# Note this token can be reused multiple times until it expires after 30 mins
auth_Token = auth_response.json()['access_token']
headers = {
'authorization':'bearer ' + auth_Token,
'accept': 'application/json'
}
# Section 3 - Reuse authentication token to call other Example OAUTH2 APIs
# Build parameter dictionary
call_params = {}
call_params['offset'] ="" # Non-mandatory param
call_params['limit'] ="5000" # The number of results
call_params['sort'] ="" #
call_params['filter'] ="" # To exclude devices
# Call devices API
print("Searching Asset ID by getting from " + BASE_URL + DEVICE_SEARCH)
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
#DEVICE_DETAILS_response = request.get(BASE_URL + DEVICE_DETAILS,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
###########Part that is not working###########
DEVICE_DETAILS_response = requests.get(BASE_URL + DEVICE_DETAILS,headers=headers)
#ID_LIST = str(len(result['resources']).replace(",", "&ids=")
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)
Hi to convert the strings in result['resources']:
['akdjlfijoaidjfod',
'g9jkidfjlskdjf44',
'3jdhfj4hf9dfiiu4']
to : https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4
try this funciton:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
print(get_modified_url(result['resources'], BASE_URL + DEVICE_DETAILS ))
full code would be:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
device_list = []
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
device_list.append(devices)
new_url = get_modified_url(device_list, BASE_URL + DEVICE_DETAILS )
DEVICE_DETAILS_response = requests.get(new_url, headers=headers)
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)
I am calling an API to get a list of properties. I obtain the jsons in the 'listings' file (https://api.nestoria.es/show_example?name=search_listings_es&syntax=1).
I am using zip codes to find each property and I need to add to each property its zip code. At the end I transform the json to a csv. I don't know how to add to each property the zip code (so I would need a new key-value to each result from the API call)...
Thanks!!!!!!!!!
Here it is the code:
from requests import get
import json
import pandas as pd
import time
import datetime
import csv
def get_nestoria(type):
#call the api
api = 'http://api.nestoria.es/api?action=search_listings'
place = '&place_name=' + area_name
listing_type = '&listing_type=' + type
json_es = '&encoding=json&pretty=1&country=es'
page = '&page='
api_input = api + place + listing_type + json_es
response = get(api_input)
# Check if the API has worked
if response.status_code == 200:
print("API called successfully")
elif response.status_code == 400:
print("Wrong request for" + area_name + ". Chechk this area is searchable")
elif response.status_code == 403:
print("Forbidden API call. Maximum number of calls reached.")
else:
print("Wrong code", response.status_code)
content_as_string = response.content.decode()
# Decode JSON
content = json.loads(content_as_string)
content_response = content['response']
# Number of total web pages neded for the area
web_pages = content_response['total_pages']
print('Number of pages in that area: ', web_pages)
print("Numer of total properties " + area_name, content_response['total_results'])
#2nd call to the API
homes = pd.DataFrame()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0'}
for i in range(1, web_pages+1):
api_input = api + place + listing_type + json_es + page + str(i)
response = get(api_input, headers=headers)
content_as_string = response.content.decode()
content = json.loads(content_as_string)
content_response = content['response']
listings = content_response['listings']
listings = pd.DataFrame(listings)
if i==1:
homes = listings
else:
homes = homes.append(listings, sort=True)
time.sleep(3)
if homes.empty:
homes = homes
else:
homes = homes[['bathroom_number','bedroom_number','car_spaces','commission','construction_year','datasource_name', 'img_height','img_url','img_width', 'floor',
'keywords','latitude','lister_url','listing_type','location_accuracy','longitude','price','price_currency','price_formatted','price_high','price_low',
'property_type','room_number','size','size_type','size_unit','summary','thumb_height','thumb_url','thumb_width','title','updated_in_days','updated_in_days_formatted']]
return homes
homes = pd.DataFrame()
codigos_postales = ['01008']
today=datetime.date.today() #to change the name of the file
for i in codigos_postales:
area_name = i
temp = get_nestoria('buy')
if i == 0:
homes =temp
else:
homes = homes.append(temp,sort=True)
print('Number of extracted properties ', len(homes))
print(homes.head())
homes.to_csv('D:\\a000Master Big Data\\Prácticas\\Web scrapping\\Nestoria\\GranadaVenta'+str(today)+'.csv')
data = response.json()
Here data is the parsed response you get from sending the request.
You can then update the listings like this:
for i in data['listings']:
i['ZipCode'] = zipcode
zipcode being the one you want to assign i.e the one you sent in the request.
You can either convert the data object into dataframe and then call pd.to_csv or use python in-built csv.writer
If you don't have the ZipCode you can use the google maps API to get that
http://maps.googleapis.com/maps/api/geocode/json?address=valencia&sensor=true_or_false&key=YOUR_API_KEY
You will have to Sign Up to get your API key and then you will get zip code from the JSON response.
UPDATE:
Here is an example on how to use it.
api = 'http://api.nestoria.es/api?action=search_listings'
place = '&place_name=' + area_name
listing_type = '&listing_type=' + type
json_es = '&encoding=json&pretty=1&country=es'
page = '&page='
api_input = api + place + listing_type + json_es
response = get(api_input)
update = response.json()['response']['listings']
for i in update:
i['Zipcode'] = zipcode
I am using a weather API to design a slack bot service using python.
My source code is-
import requests
import re
import json
from bs4 import BeautifulSoup
def weather(cityname):
cityid = extractid(cityname)
url = "http://api.openweathermap.org/data/2.5/forecast?id=" + str(cityid) + "&APPID=c72f730d08a4ea1d121c8e25da7e4411"
while True:
r = requests.get(url, timeout=5)
while r.status_code is not requests.codes.ok:
r = requests.get(url, timeout=5)
soup = BeautifulSoup(r.text)
data = ("City: " + soup.city["name"] + ", Country: " + soup.country.text + "\nTemperature: " + soup.temperature["value"] +
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
# print data
return data
def extractid(cname):
with open('/home/sourav/Git-Github/fabulous/fabulous/services/city.list.json') as data_file:
data = json.load(data_file)
for item in data:
if item["name"] == cname:
return item["id"]
def on_message(msg, server):
text = msg.get("text", "")
match = re.findall(r"~weather (.*)", text)
if not match:
return
searchterm = match[0]
return weather(searchterm.encode("utf8"))
on_bot_message = on_message
But executing the code gives the following error-
File "/usr/local/lib/python2.7/dist-packages/fabulous-0.0.1-py2.7.egg/fabulous/services/weather.py", line 19, in weather
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
TypeError: 'NoneType' object has no attribute '__getitem__'
I can't figure out what's the error. Please help!
__getitem__ is called when you ask for dictionary key like a['abc'] translates to a.__getitem__('abc')
so in this case one attribute of soup is None (speed, direction or weather)
ensure that your r.text contains data you want, simply print it:
print(r.text)
list structure in parsed data:
for child in soup.findChildren():
print child
always assume your entry data might be wrong, instead doing soup.city do soup.find('city'), it might be empty so:
city = soup.find('city')
if len(city):
city_name = city[0]['name']
else:
city_name = 'Error' # or empty, or sth
I've been trying to use the API of the website Idealista (https://www.idealista.com/) to retrieve information of real estate data.
Since I'm not familiarized with OAuth2 I haven't been able to obtain the token so far. I have just been provided with the api key, the secret and some basic info of how to mount the http request.
I would appreciate an example (preferably in Python) of the functioning of this API, or else some more generic info about dealing with OAuth2 and Python.
After some days of research I came up with a basic python code to retrieve real estate data from the Idealista API.
def get_oauth_token():
http_obj = Http()
url = "https://api.idealista.com/oauth/token"
apikey= urllib.parse.quote_plus('Provided_API_key')
secret= urllib.parse.quote_plus('Provided_API_secret')
auth = base64.encode(apikey + ':' + secret)
body = {'grant_type':'client_credentials'}
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8','Authorization' : 'Basic ' + auth}
resp, content = http_obj.request(url,method='POST',headers=headers, body=urllib.parse.urlencode(body))
return content
This function would return a JSON with the OAuth2 token and the session time in seconds. Afterwards, to query the API, it would be as simple as:
def search_api(token):
http_obj = Http()
url = "http://api.idealista.com/3.5/es/search?center=40.42938099999995,-3.7097526269835726&country=es&maxItems=50&numPage=1&distance=452&propertyType=bedrooms&operation=rent"
headers = {'Authorization' : 'Bearer ' + token}
resp, content = http_obj.request(url,method='POST',headers=headers)
return content
This time the we would find in the content var the data we were looking for, again as a JSON.
That can't be marked as correct answer since
auth = base64.encode(apikey + ':' + secret)
body = {'grant_type':'client_credentials'}
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8','Authorization' : 'Basic ' + auth}
Will give you TypeError:
can only concatenate str (not "bytes") to str
Since base64encode returns a byte type object...
It's true Idealista API is very limited about documentation, but I think this is a better approach since I don't use unnecesary libs (Only native):
#first request
message = API_KEY + ":" + SECRET
auth = "Basic " + base64.b64encode(message.encode("ascii")).decode("ascii")
headers_dic = {"Authorization" : auth,
"Content-Type" : "application/x-www-form-urlencoded;charset=UTF-8"}
params_dic = {"grant_type" : "client_credentials",
"scope" : "read"}
r = requests.post("https://api.idealista.com/oauth/token",
headers = headers_dic,
params = params_dic)
This works flawless with only python requests and base64 module...
regards
This is my code, improving #3... this run ok! for me!!!!
only put your apikey and your password (secret)...
import pandas as pd
import json
import urllib
import requests as rq
import base64
def get_oauth_token():
url = "https://api.idealista.com/oauth/token"
apikey= 'your_api_key' #sent by idealista
secret= 'your_password' #sent by idealista
auth = base64.b64encode(apikey + ':' + secret)
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8' ,'Authorization' : 'Basic ' + auth}
params = urllib.urlencode({'grant_type':'client_credentials'})
content = rq.post(url,headers = headers, params=params)
bearer_token = json.loads(content.text)['access_token']
return bearer_token
def search_api(token, url):
headers = {'Content-Type': 'Content-Type: multipart/form-data;', 'Authorization' : 'Bearer ' + token}
content = rq.post(url, headers = headers)
result = json.loads(content.text)['access_token']
return result
country = 'es' #values: es, it, pt
locale = 'es' #values: es, it, pt, en, ca
language = 'es' #
max_items = '50'
operation = 'sale'
property_type = 'homes'
order = 'priceDown'
center = '40.4167,-3.70325'
distance = '60000'
sort = 'desc'
bankOffer = 'false'
df_tot = pd.DataFrame()
limit = 10
for i in range(1,limit):
url = ('https://api.idealista.com/3.5/'+country+'/search?operation='+operation+#"&locale="+locale+
'&maxItems='+max_items+
'&order='+order+
'¢er='+center+
'&distance='+distance+
'&propertyType='+property_type+
'&sort='+sort+
'&numPage=%s'+
'&language='+language) %(i)
a = search_api(get_oauth_token(), url)
df = pd.DataFrame.from_dict(a['elementList'])
df_tot = pd.concat([df_tot,df])
df_tot = df_tot.reset_index()
I found some mistakes. At least, I cannot run it.
I believe, I improved with this:
import pandas as pd
import json
import urllib
import requests as rq
import base64
def get_oauth_token():
url = "https://api.idealista.com/oauth/token"
apikey= 'your_api_key' #sent by idealist
secret= 'your_password' #sent by idealista
apikey_secret = apikey + ':' + secret
auth = str(base64.b64encode(bytes(apikey_secret, 'utf-8')))[2:][:-1]
headers = {'Authorization' : 'Basic ' + auth,'Content-Type': 'application/x-www-form-
urlencoded;charset=UTF-8'}
params = urllib.parse.urlencode({'grant_type':'client_credentials'}) #,'scope':'read'
content = rq.post(url,headers = headers, params=params)
bearer_token = json.loads(content.text)['access_token']
return bearer_token
def search_api(token, URL):
headers = {'Content-Type': 'Content-Type: multipart/form-data;', 'Authorization' : 'Bearer ' + token}
content = rq.post(url, headers = headers)
result = json.loads(content.text)
return result
I have spent hours in frustration and now I have this:
import requests, json, urllib
import time
import string, random
from hashlib import sha1
import hmac, binascii
def twitterIDGenerator(length):
toRet = ""
for i in range(0, length):
toRet = toRet + random.choice(string.hexdigits)
return toRet
def twitterSignatureGenerator(baseString, keyString):
hashed = hmac.new(keyString, baseString, sha1)
return binascii.b2a_base64(hashed.digest()).rstrip('\n')
OAUTH_CONSUMER_KEY = ''
OAUTH_NONCE = twitterIDGenerator(32)
OAUTH_SIGNATURE_METHOD = 'HMAC-SHA1'
OAUTH_TIMESTAMP = str(int(time.time()))
OAUTH_VERSION = '1.0'
# Get request token from Twitter
request_tokenURL = 'https://api.twitter.com/oauth/request_token'
request_tokenParameterString = ("oauth_consumer_key=" + OAUTH_CONSUMER_KEY +
"&oauth_nonce=" + OAUTH_NONCE + "&oauth_signature_method=" +
OAUTH_SIGNATURE_METHOD + "&oauth_timestamp=" + OAUTH_TIMESTAMP +
"&oauth_version=" + OAUTH_VERSION)
request_tokenSigBaseString = ("POST&https%3A%2F%2Fapi.twitter.com%2Foauth%2Frequest_token&" +
urllib.quote(request_tokenParameterString))
request_tokenSignature = twitterSignatureGenerator(request_tokenSigBaseString,
'[REDACTED consumer secret key]')
request_tokenHeaders = {'oauth_nonce': OAUTH_NONCE,
'oauth_callback': 'oob',
'oauth_signature_method': OAUTH_SIGNATURE_METHOD,
'oauth_timestamp': OAUTH_TIMESTAMP,
'oauth_consumer_key': OAUTH_CONSUMER_KEY,
'oauth_signature': urllib.quote(request_tokenSignature),
'oauth_version': OAUTH_VERSION}
request_tokenResponse = requests.post(request_tokenURL, headers=request_tokenHeaders)
print request_tokenResponse.text
So far, it is supposed to return a request_token so I can have my user go to the PIN website so I can get the access_token. But I just get "Failed to validate oauth signature and token" from Twitter.
A possible reason for this is wrong URL encoding. I see that Twitter needs RFC3986 encoding. Is there a way to do this in Python? If yes, should I do it only at the two locations I am currently using urllib.quote? Is my oauth_signature generated correctly?
The documentation is annoyingly convoluted.