Twitter Search API using urllib2 - python

I am beginner to API calls using python (or even just API calls). I am trying a basic call with the Twitter API.
My Code for generating oauth_signature is as follows :
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())
key_dict['oauth_signature'] = urllib.quote(finKey, '')
where key_dict stores all the keys :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9ab59691142584g739134971f75aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json?" + urllib.quote("q=delhi+elections", '')
I generate the Base Header String using the following :
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
Although when I am making the call, I get :
urllib2.HTTPError: HTTP Error 401: Unauthorized
OR
urllib2.URLError: <urlopen error [Errno 60] Operation timed out>
My final request is made using the following :
getSignature("delhi+elections")
headers = { 'Authorization' : getHeaderString()}
req = urllib2.Request(BASE_URL, headers= headers)
response = urllib2.urlopen(req)
Where am I going wrong ?

Few Points that should have been mentioned somewhere :
The method : binascii.b2a_base64(hashed.digest()) appends a new line feed at the end of the string. This cause the oauth_signature to fail Authenticate.
The delhi+elections is actually supposed to be delhi elections. This mismatch again made the Hash Value match in sha1 to fail.
Removing both of them solved the problem.
The final Code :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9aa39691142584s7df134971375aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json"
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())[:-1]
key_dict['oauth_signature'] = urllib.quote(finKey, '')
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
url = BASE_URL
getSignature("delhi elections")
headers = { 'Authorization' : getHeaderString()}
values = {'q':'delhi elections'}
data = urllib.urlencode(values)
req = urllib2.Request(url+"?"+data, headers= headers)
response = urllib2.urlopen(req)
the_page = response.read()
print the_page

Instead of coding your own client, have you tried using tweepy? For a reference implementation using this library, you can check twitCheck client.

Related

How to sign an OKEx POST API request?

The below is a result of this question How to sign an OKEx API request? and some of the answers:
import hmac
import base64
import requests
import datetime
import json
from config import KEY, SECRET, PASS, ROOT_URL
def get_time():
now = datetime.datetime.utcnow()
t = now.isoformat("T", "milliseconds")
return t + "Z"
def signature(timestamp, request_type, endpoint, body, secret):
if body != '':
body = json.dumps(body)
message = str(timestamp) + str.upper(request_type) + endpoint + body
print(message)
mac = hmac.new(bytes(secret, encoding='utf-8'), bytes(message, encoding='utf-8'), digestmod='sha256')
d = mac.digest()
return base64.b64encode(d)
def get_header(request_type, endpoint, body):
time = get_time()
header = dict()
header['CONTENT-TYPE'] = 'application/json'
header['OK-ACCESS-KEY'] = KEY
header['OK-ACCESS-SIGN'] = signature(time, request_type, endpoint, body, SECRET)
header['OK-ACCESS-TIMESTAMP'] = str(time)
header['OK-ACCESS-PASSPHRASE'] = PASS
return header
def get(endpoint, body=''):
url = ROOT_URL + endpoint
header = get_header('GET', endpoint, body)
return requests.get(url, headers=header)
def post(endpoint, body=''):
url = ROOT_URL + endpoint
header = get_header('POST', endpoint, body)
return requests.post(url, headers=header)
where KEY, SECRET, PASS are the API key, secret key, and pass phrase respectively; The ROOT_URL is 'https://www.okex.com'.
The Problem
GET requests work absolutely fine, so when I run the following, there are no issues:
ENDPOINT = '/api/v5/account/balance'
BODY = ''
response = get(ENDPOINT)
response.json()
However, when I try to place an order via a POST request, like so:
ENDPOINT = '/api/v5/trade/order'
BODY = {"instId":"BTC-USDT",
"tdMode":"cash",
"side":"buy",
"ordType":"market",
"sz":"1"}
response = post(ENDPOINT, body=BODY)
response.json()
I get the following output, i.e. it won't accept the signature:
{'msg': 'Invalid Sign', 'code': '50113'}
Related Questions
In this one Can't figure out how to send a signed POST request to OKEx an answer was provided, but it does not work for me as I was already using the suggested URL. More or less the same question was asked here Unable to send a post requests OKEX Invalid Signature, but no activity likely due to the format, so I thought I would repost and elaborate.
OKEX Docs
The docs simply specify that The API endpoints of Trade require authentication (https://www.okex.com/docs-v5/en/?python#rest-api-authentication-signature). But they make no reference to there being any difference between the two methods. Away from that, I am including all required parameters in the body of the post request as far as I can see.
I would appreciate any input on this.
Many thanks!
I ran into the same POST problem and figured it out. I used new domain name okex.com. Here is my code.
def set_userinfo(self):
position_path = "/api/v5/account/set-position-mode"
try:
self.get_header("POST", position_path, {"posMode":"net_mode"})
resp = requests.post(url=self.base_url+position_path, headers=self.headers, json={"posMode":"long_short_mode"}).json()
except Exception as e:
log.error("OK set_userinfo error={} type={}".format(f'{e}', f'{type(e)}'))
def get_header(self, request_type, endpoint, body=''):
timestamp = self.get_time()
self.headers["OK-ACCESS-TIMESTAMP"] = timestamp
self.headers["OK-ACCESS-SIGN"] = self.signature(timestamp, request_type, endpoint, body)
def signature(self, timestamp, request_type, endpoint, body):
if body != '':
body = json.dumps(body)
message = str(timestamp) + str.upper(request_type) + endpoint + body
mac = hmac.new(bytes(self.secret_key, encoding='utf-8'), bytes(message, encoding='utf-8'), digestmod='sha256').digest()
return base64.b64encode(mac)
I have fix the same problem.
Both of the 'body' in signature() and in get_header() should be json.
So you should add following code:
if str(body) == '{}' or str(body) == 'None':
body = ''
else:
body = json.dumps(body)
I ran into the same problem and solved it using below code snippet, the idea is from https://stackoverflow.com/a/68115787/20497127, but I modified a little by adding POST functionality
APIKEY = "" # input key
APISECRET = "" #input secret
PASS = "" #input passphrase
BASE_URL = 'https://www.okx.com'
def send_signed_request(http_method, url_path, payload={}):
def get_time():
return dt.datetime.utcnow().isoformat()[:-3]+'Z'
def signature(timestamp, method, request_path, body, secret_key):
if str(body) == '{}' or str(body) == 'None':
body = ''
message = str(timestamp) + str.upper(method) + request_path + str(body)
mac = hmac.new(bytes(secret_key, encoding='utf8'), bytes(message, encoding='utf-8'), digestmod='sha256')
d = mac.digest()
return base64.b64encode(d)
# set request header
def get_header(request='GET', endpoint='', body:dict=dict()):
cur_time = get_time()
header = dict()
header['CONTENT-TYPE'] = 'application/json'
header['OK-ACCESS-KEY'] = APIKEY
header['OK-ACCESS-SIGN'] = signature(cur_time, request, endpoint , body, APISECRET)
header['OK-ACCESS-TIMESTAMP'] = str(cur_time)
header['OK-ACCESS-PASSPHRASE'] = PASS
# demo trading: need to set x-simulated-trading=1, live trading is 0
header['x-simulated-trading'] = '1'
return header
url = BASE_URL + url_path
header = get_header(http_method, url_path, payload)
print(url)
print(header)
if http_method == 'GET':
response = requests.get(url, headers=header)
elif http_method == 'POST':
response = requests.post(url, headers=header, data=payload)
return response.json()
# this will run get requests
res = send_signed_request("GET", "/api/v5/account/balance", payload={})
# this will run post requests
data = {
"instId": "BTC-USDT",
"tdMode": "cross",
"side": "sell",
"ccy":"USDT",
"ordType": "limit",
"px": "100000",
"sz": "0.01"
}
res = send_signed_request("POST", "/api/v5/trade/order", payload=json.dumps(data))

How to use nordvpn servers as proxy for python requests?

I am trying to create a list of proxies using nordvpn for python requests, but I can't seem to figure the correct format to write the server as a proxy.
From what I understood the format is a such with this server as an example:
proxy = {
'http': "username:password#us6181.nordvpn.com",
'https': "username:password#us6181.nordvpn.com"
}
I have tried various combinations:
my login email and password
my nordvpn account username and password
I realize not all servers can be used as proxy so I made sure they are
I tried using udp/tcp instead of http/https
None of these attempts worked, and I really hope someone can tell me the proper way to do it.
Here is a simple script that i made:
import requests
from requests.auth import HTTPProxyAuth
import re
import random
#this is some proxy to use for nordvpn
#196.240.57.107
#37.120.217.219
up1 = ['username:password'
]
up2 = random.choice(up1)
u1 = re.findall(r'[\w]+:', up2)
p1 = re.findall(r':+[\w]+[\w]', up2)
u2 = str(u1)
u3 = u2.replace(':', '')
u3 = u3.replace('[', '')
u3 = u3.replace("'", '')
u3 = u3.replace(']', '')
p2 = str(p1)
p3 = p2.replace(':', '')
p3 = p3.replace('[', '')
p3 = p3.replace("'", '')
p3 = p3.replace(']', '')
proxies = {"http":"http://217.138.202.147"}
print(s)
auth = HTTPProxyAuth(u3, p3)
x = requests.get("http://ifconfig.me/ip")
print('Real ip: ' + x.text)
try:
r = requests.get("http://ipv4.icanhazip.com", proxies=proxies, auth=auth)
print(r.text)
except requests.exceptions.ProxyError:
proxies = {"https":"http://217.138.202.147"}
r = requests.get("http://ipv4.icanhazip.com/", proxies=proxies, auth=auth)
print(r.text)
some proxies won't work you had to test them
and a tester
import requests
from requests.auth import HTTPProxyAuth
import re
import random
list1 = []
def main():
c1 = random.randint(0,230)
c2 = str(c1)
c3 = c2.replace("'", '')
url = 'https://nordvpn.com/wp-admin/admin-ajax.php?action=servers_recommendations&filters={"country_id":' + c3 + '}'
headers = {
'accept': "application/json/",
'content-type': "application/json"
}
response = requests.request("GET", url, headers=headers)
rep1 = response.text
rep2 = re.findall(r'"ip":"[\d]+.[\d]+.[\d]+.[\d]+"', rep1)
rep3 = str(rep2)
if '[]' not in rep3:
rep4 = rep3.replace('"ip":"', '')
rep4 = rep4.replace("'", '')
rep4 = rep4.replace('"', '')
rep4 = rep4.replace(']', '')
rep4 = rep4.replace('[', '')
rep4 = rep4.replace(',', '')
rep5 = rep4.split()
for list2 in rep5:
list1.append(list2)
if '[]' in rep3:
main()
main()
for a in list1:
try:
prox = a
up1 = ['username:password'
]
up2 = random.choice(up1)
u1 = re.findall(r'[\w]+:', up2)
p1 = re.findall(r':+[\w]+[\w]', up2)
u2 = str(u1)
u3 = u2.replace(':', '')
u3 = u3.replace('[', '')
u3 = u3.replace("'", '')
u3 = u3.replace(']', '')
p2 = str(p1)
p3 = p2.replace(':', '')
p3 = p3.replace('[', '')
p3 = p3.replace("'", '')
p3 = p3.replace(']', '')
proxies = {"http":"http://" + prox}
auth = HTTPProxyAuth(u3, p3)
r = requests.get("http://ipv4.icanhazip.com", proxies=proxies, auth=auth)
if '<p>The following error was encountered while trying to retrieve the URL: <a' in r:
print(prox + ' Auth failed')
elif '<p>The following error was encountered while trying to retrieve the URL: <a' not in r:
print(prox + ' Good')
print('Your Ip: ' + r.text)
except requests.exceptions.ProxyError:
print(prox + ' Failed')
pass
Auth Failed means you had to use more username and passwords
Hope you enjoy it

How to create testcase for those method

I have two method and here is test i have done for feedback_selected_material_to_unsplash. I need to convert this test into using mock and also i'm not sure how to write properly test for download_image_material. I appreciate for any help
Here is the testcase
def test_feedback_selected_material_to_unsplash_download_location_not_null(self, mock_post):
URL = 'https://material.richka.co/track/unsplash'
data = {
'download_location': self.download_location
}
resp = requests.post("www.someurl.com", data=json.dumps(data), headers={'Content-Type': 'application/json'})
mock_post.assert_called_with(URL, data=json.dumps(data), headers={'Content-Type': 'application/json'})
def feedback_selected_material_to_unsplash(download_location):
if not download_location == 'null':
URL = 'https://test/track/unsplash'
data = {
'download_location': download_location
}
try:
response = requests.post(URL, data)
logger.info(response.json())
logger.info('Done the feedback to unsplash.com')
except:
pass
this method i dont know how to write testcase for this
def download_image_material(request, data, video, keyword, keyword_gui, material_type):
dname_tmp = settings.BASE_DIR + '/web/static'
hashedDname = get_path_stock_box()
saveFolder = gen_hash(gen_fname())
path = '%s/%s/%s' % (dname_tmp, hashedDname, saveFolder)
if not os.path.exists(path):
os.makedirs(path)
objs = []
material = Material(generated_video_data=video, keyword = keyword)
material.is_video = material_type
material.save()
for index,datum in enumerate(data):
if index == 50:
break
obj = {}
obj['word'] = datum['word']
obj['link'] = datum['link']
obj['user'] = datum['user']
obj['download_location'] = datum['download_location'] if 'unsplash.com' in datum['link'] else None
imgUrl = datum['small']
try:
headers = {}
response, imgRaw = http_get(imgUrl, headers=headers)
except urllib.error.HTTPError as ex:
continue
except:
continue
imgUrlWithoutQuery = re.sub(r'\?.*','',imgUrl)
suffix = imgUrlWithoutQuery[imgUrlWithoutQuery.rfind('.') + 1 :]
suffix = suffix.lower()
if suffix in settings.IMG_EXTENSIONS or suffix in settings.VIDEO_EXTENSIONS or suffix.lower() == 'mp4':
pass
else:
mime = response.info()['Content-Type']
suffix = _mime2suffix(mime)
if suffix not in settings.IMG_EXTENSIONS and suffix not in settings.VIDEO_EXTENSIONS or suffix.lower() == 'mp4':
continue
imgFname = '%s.%s' % (gen_hash(gen_fname()), suffix)
imgPathFname = '%s/%s/%s/%s' % (dname_tmp, hashedDname, saveFolder, imgFname)
imgPathFnameSaved = '%s/%s/%s' % (hashedDname, saveFolder, imgFname)
fout = open(imgPathFname, 'wb')
fout.write(imgRaw)
fout.close()
#process file
obj['media'] = imgPathFnameSaved
if suffix in settings.IMG_EXTENSIONS:
save_image_with_resize_and_rotate(imgPathFname)
obj['media'] = imgPathFnameSaved
elif suffix.lower() in settings.VIDEO_EXTENSIONS:
# convert videos to mp4 and delete original files
upload.conv2mp4(path)
os.remove(path)
hashed_name = imgPathFnameSaved[ : imgPathFnameSaved.rfind('.')] + '.mp4'
obj['media'] = hashed_name
if suffix == 'mp4':
obj['video'] = {}
obj['video']['duration'] = gen.get_video_duration(settings.BASE_DIR + '/web/static/' + obj['media'])
gen_thumbnail(settings.BASE_DIR + '/web/static/' + obj['media'])
fname_mp4 = obj['media']
obj['media'] = fname_mp4[ : fname_mp4.rfind('.')] + '.png'
#process service name
url = urlparse(datum['link'])
obj['service'] = {}
obj['service']['url'] = f'{url.scheme}://{url.netloc}'
obj['service']['hostname'] = url.netloc
# get json from database
material = Material.objects.get(id=material.pk)
objDb = material.get_json_data()
objs = []
if objDb:
for objOld in objDb:
objs.append(objOld)
objs.append(obj)
material.set_json_data(objs)
material.save()
res_json = {'result': True, 'data':obj, 'keyword': keyword_gui, 'pk': material.pk}
yield json.dumps(res_json) + '\n'
material.set_json_data(objs)
material.save()
yield json.dumps({'result': True, 'end': True, 'pk': material.pk}) + '\n'

Getting ValueError: unknown url type: ' '

I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
print(tracks)
print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing?
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
#print(tracking_url)
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
Full working exemple that reproduces the error:
import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket
apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'
api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
print("Tracking_url====================" +tracking_url + "==================================")
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
ID = 0
#get top artists from country
artists = {}
for i in range(2, 3):
artists_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
artists_data = artists_response.json()
for artist in artists_data["topartists"]["artist"]:
name = artist["name"]
url = artist["url"]
if ID > 1: continue
artists[ID] = {}
artists[ID]['ID'] = ID
artists[ID]['name'] = name
ID += 1
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
artist_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
artist_data = artist_response.json()
# get top albums of the artists
albums = {}
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
topalbums_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
albums_data = topalbums_response.json()
for album in albums_data['topalbums']['album']:
name = album["name"]
url = album["url"]
albums[ID] = {}
albums[ID]['ID'] = ID
albums[ID]['artist'] = artists[i]['name']
albums[ID]['artistID'] = artists[i]['ID']
albums[ID]['name'] = name
ID += 1
# Get tracks of the album
tracks = {}
for i, v in albums.items():
artist = albums[i]['artist'].replace(" ", "+")
name = albums[i]['name'].replace(" ", "+")
album_response_data = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
album_response = album_response_data.json()
for album in album_response['album']['tracks']['track']:
title = album['name']
tracks[ID] = {}
tracks[ID]['trackID'] = ID
tracks[ID]['title'] = title
tracks[ID]['artist'] = albums[i]['artist']
tracks[ID]['album'] = albums[i]['name']
tracks[ID]['albumID'] = albums[i]['ID']
ID += 1
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
# print the lyric of each track
print(song_lyric(title, artist))
It seems like url is not correct. It happens here:
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
If you have ability to run that API locally and see what is returned into tracking_url, you can find out what is still wrong with it.
UPDATE:
I reproduced it, so the urllib.request cannot process empty string URL: "", that is why you need to check if the tracking_url != "" and only if its not empty string or None you need to request for the song.

How to get real estate data with Idealista API?

I've been trying to use the API of the website Idealista (https://www.idealista.com/) to retrieve information of real estate data.
Since I'm not familiarized with OAuth2 I haven't been able to obtain the token so far. I have just been provided with the api key, the secret and some basic info of how to mount the http request.
I would appreciate an example (preferably in Python) of the functioning of this API, or else some more generic info about dealing with OAuth2 and Python.
After some days of research I came up with a basic python code to retrieve real estate data from the Idealista API.
def get_oauth_token():
http_obj = Http()
url = "https://api.idealista.com/oauth/token"
apikey= urllib.parse.quote_plus('Provided_API_key')
secret= urllib.parse.quote_plus('Provided_API_secret')
auth = base64.encode(apikey + ':' + secret)
body = {'grant_type':'client_credentials'}
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8','Authorization' : 'Basic ' + auth}
resp, content = http_obj.request(url,method='POST',headers=headers, body=urllib.parse.urlencode(body))
return content
This function would return a JSON with the OAuth2 token and the session time in seconds. Afterwards, to query the API, it would be as simple as:
def search_api(token):
http_obj = Http()
url = "http://api.idealista.com/3.5/es/search?center=40.42938099999995,-3.7097526269835726&country=es&maxItems=50&numPage=1&distance=452&propertyType=bedrooms&operation=rent"
headers = {'Authorization' : 'Bearer ' + token}
resp, content = http_obj.request(url,method='POST',headers=headers)
return content
This time the we would find in the content var the data we were looking for, again as a JSON.
That can't be marked as correct answer since
auth = base64.encode(apikey + ':' + secret)
body = {'grant_type':'client_credentials'}
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8','Authorization' : 'Basic ' + auth}
Will give you TypeError:
can only concatenate str (not "bytes") to str
Since base64encode returns a byte type object...
It's true Idealista API is very limited about documentation, but I think this is a better approach since I don't use unnecesary libs (Only native):
#first request
message = API_KEY + ":" + SECRET
auth = "Basic " + base64.b64encode(message.encode("ascii")).decode("ascii")
headers_dic = {"Authorization" : auth,
"Content-Type" : "application/x-www-form-urlencoded;charset=UTF-8"}
params_dic = {"grant_type" : "client_credentials",
"scope" : "read"}
r = requests.post("https://api.idealista.com/oauth/token",
headers = headers_dic,
params = params_dic)
This works flawless with only python requests and base64 module...
regards
This is my code, improving #3... this run ok! for me!!!!
only put your apikey and your password (secret)...
import pandas as pd
import json
import urllib
import requests as rq
import base64
def get_oauth_token():
url = "https://api.idealista.com/oauth/token"
apikey= 'your_api_key' #sent by idealista
secret= 'your_password' #sent by idealista
auth = base64.b64encode(apikey + ':' + secret)
headers = {'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8' ,'Authorization' : 'Basic ' + auth}
params = urllib.urlencode({'grant_type':'client_credentials'})
content = rq.post(url,headers = headers, params=params)
bearer_token = json.loads(content.text)['access_token']
return bearer_token
def search_api(token, url):
headers = {'Content-Type': 'Content-Type: multipart/form-data;', 'Authorization' : 'Bearer ' + token}
content = rq.post(url, headers = headers)
result = json.loads(content.text)['access_token']
return result
country = 'es' #values: es, it, pt
locale = 'es' #values: es, it, pt, en, ca
language = 'es' #
max_items = '50'
operation = 'sale'
property_type = 'homes'
order = 'priceDown'
center = '40.4167,-3.70325'
distance = '60000'
sort = 'desc'
bankOffer = 'false'
df_tot = pd.DataFrame()
limit = 10
for i in range(1,limit):
url = ('https://api.idealista.com/3.5/'+country+'/search?operation='+operation+#"&locale="+locale+
'&maxItems='+max_items+
'&order='+order+
'&center='+center+
'&distance='+distance+
'&propertyType='+property_type+
'&sort='+sort+
'&numPage=%s'+
'&language='+language) %(i)
a = search_api(get_oauth_token(), url)
df = pd.DataFrame.from_dict(a['elementList'])
df_tot = pd.concat([df_tot,df])
df_tot = df_tot.reset_index()
I found some mistakes. At least, I cannot run it.
I believe, I improved with this:
import pandas as pd
import json
import urllib
import requests as rq
import base64
def get_oauth_token():
url = "https://api.idealista.com/oauth/token"
apikey= 'your_api_key' #sent by idealist
secret= 'your_password' #sent by idealista
apikey_secret = apikey + ':' + secret
auth = str(base64.b64encode(bytes(apikey_secret, 'utf-8')))[2:][:-1]
headers = {'Authorization' : 'Basic ' + auth,'Content-Type': 'application/x-www-form-
urlencoded;charset=UTF-8'}
params = urllib.parse.urlencode({'grant_type':'client_credentials'}) #,'scope':'read'
content = rq.post(url,headers = headers, params=params)
bearer_token = json.loads(content.text)['access_token']
return bearer_token
def search_api(token, URL):
headers = {'Content-Type': 'Content-Type: multipart/form-data;', 'Authorization' : 'Bearer ' + token}
content = rq.post(url, headers = headers)
result = json.loads(content.text)
return result

Categories