Getting ValueError: unknown url type: ' '

Getting ValueError: unknown url type: ' ' - python

I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
print(tracks)
print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing?
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
#print(tracking_url)
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
Full working exemple that reproduces the error:
import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket
apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'
api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
print("Tracking_url====================" +tracking_url + "==================================")
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
ID = 0
#get top artists from country
artists = {}
for i in range(2, 3):
artists_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
artists_data = artists_response.json()
for artist in artists_data["topartists"]["artist"]:
name = artist["name"]
url = artist["url"]
if ID > 1: continue
artists[ID] = {}
artists[ID]['ID'] = ID
artists[ID]['name'] = name
ID += 1
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
artist_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
artist_data = artist_response.json()
# get top albums of the artists
albums = {}
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
topalbums_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
albums_data = topalbums_response.json()
for album in albums_data['topalbums']['album']:
name = album["name"]
url = album["url"]
albums[ID] = {}
albums[ID]['ID'] = ID
albums[ID]['artist'] = artists[i]['name']
albums[ID]['artistID'] = artists[i]['ID']
albums[ID]['name'] = name
ID += 1
# Get tracks of the album
tracks = {}
for i, v in albums.items():
artist = albums[i]['artist'].replace(" ", "+")
name = albums[i]['name'].replace(" ", "+")
album_response_data = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
album_response = album_response_data.json()
for album in album_response['album']['tracks']['track']:
title = album['name']
tracks[ID] = {}
tracks[ID]['trackID'] = ID
tracks[ID]['title'] = title
tracks[ID]['artist'] = albums[i]['artist']
tracks[ID]['album'] = albums[i]['name']
tracks[ID]['albumID'] = albums[i]['ID']
ID += 1
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
# print the lyric of each track
print(song_lyric(title, artist))

It seems like url is not correct. It happens here:
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
If you have ability to run that API locally and see what is returned into tracking_url, you can find out what is still wrong with it.
UPDATE:
I reproduced it, so the urllib.request cannot process empty string URL: "", that is why you need to check if the tracking_url != "" and only if its not empty string or None you need to request for the song.

Related

client side caching be able to write responses to the disk and fetch them from the disk when I get a cache hit

I need to add client side caching functionality to my client, I don't need to implement any replacement or validation policies.Just be able to write responses to the disk (i.e., the cache) and fetch them from the disk when I get a cache hit. For this, I need to implement some internal data structure in my client to keep track of which objects are cached and where they are on the disk. I can keep this data structure in the main memory; there is no need to make it persist across shutdowns.
Here's the Code I tried to write the caching part but it isn't working I need your help please:
import socket
import selectors
import os
commands = []
requests = []
request_methods = []
filenames = []
host_names = []
port_numbers = []
cached_objects = {}
sel = selectors.DefaultSelector()
with open('commands.txt', encoding='UTF-8', mode='r') as f:
commands = f.readlines()
def parse_file():
for count in range(len(commands)):
request_method = commands[count].split(' ')[0]
request_methods.append(request_method)
filename = commands[count].split(' ')[1]
filenames.append(filename)
host_name = commands[count].split(' ')[2].strip('\n')
host_names.append(host_name)
try:
port_number = commands[count].split(' ')[3].strip('\n')
port_numbers.append(port_number)
except Exception as e:
port_number = 80
port_numbers.append(port_number)
requests.append(generate_request(request_method, filename, host_name))
def generate_request(request_method, filename, host_name):
request = ''
if request_method == "GET":
request += request_method + ' /' + filename + ' HTTP/1.0\r\n'
request += 'Host:' + host_name + '\r\n\r\n'
print(request)
elif request_method == "POST":
request += request_method + ' /' + filename + ' HTTP/1.0\r\n'
request += 'Host:' + host_name + '\r\n'
request += '\r\n'
#Reading POST File From ClientFiles
print(filename)
f = open(filename,"r")
request += f.read()
print(request)
return request
def start_connections(host, port, request, filename, request_method):
server_addr = (host, port)
events = selectors.EVENT_READ | selectors.EVENT_WRITE
# connid = count + 1
print(f"Starting connection to {server_addr}")
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect_ex(server_addr)
sock.sendall(bytes(request, 'UTF-8'))
data = sock.recv(4092)
response = data.decode()
cached_objects[request] = response # <<<<<<<<<<<<<<<<<<
# TODO: Fix Name of the file
fileReady = "Clientfiles/"
head, tail = os.path.split(filename)
fileReady += tail
print("\n RESPONSE " + response + "\n response end")
try:
if request_method == "GET":
payload = response.split('\r\n\r\n', 1)[1]
print("MyPayload " + payload)
f = open(fileReady, "w")
f.write(payload)
f.close()
except Exception as e:
print(e)
print("From Server :", response)
print("\n\n")
sel.register(sock, events)
def check_cache(request):
for i in range(len(commands)):
request = requests[i]
if request in cached_objects.keys():
response = cached_objects[request] # <<<<<<<<<<<<<<<<<<
print("\n RESPONSE From cache " + response + "\n response end") # <<<<<<<<<<<<<<<<<<
# i = i + 1 # <<<<<<<<<<<<<<<<<<
else:
start_connections(host_names[i], int(port_numbers[i]), requests[i], filenames[i], request_methods[i])
parse_file()
check_cache(generate_request.request)

Using data from API in subsequent API calls

I should preface this with I am not a programmer and most of this code was not written by me. I unfortunately have a need and am trying to hack my way through this.
What I am trying to do is chain a few API calls together to ultimately get a list of IPs. What this script does is queries the API and pulls (and prints) a list of device IDs. The device IDs look like this:
akdjlfijoaidjfod
g9jkidfjlskdjf44
3jdhfj4hf9dfiiu4
The device IDs then need to be passed as a parameter in the next API call like this:
https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4 and so on.
I dont know where to begin. Instead of printing the asset ids, I assume they should be stored as a parameter (or variable) and then appended to the URL. I tried doing that with "ID_LIST" but that didnt seem to work. Can you guys point me in the right direction?
import requests
import json
# Define API REST paths
BASE_URL = "https://api.example.com/"
OAUTH_URL_PART = "oauth2/token"
DEVICE_SEARCH = "devices/queries/devices/v1"
DEVICE_DETAILS = "devices/entities/devices/v1"
# Empty auth token to hold value for subsequent request
auth_Token = ""
# Section 1 - Authenticate to Example OAUTH
# Build a dictionary to hold the headers
headers = {
'Content-type': 'application/x-www-form-urlencoded',
'accept': 'application/json'
}
# Build a dictionary to holds the authentication data to be posted to get a token
auth_creds = {}
auth_creds['client_id'] = "<client_id>"
auth_creds['client_secret'] = "<client_secret>"
auth_creds['grant_type'] = "client_credentials"
# Call the API to get a Authentication token - NOTE the authentication creds
print("Requesting token from " + BASE_URL + OAUTH_URL_PART)
auth_response = requests.post(BASE_URL + OAUTH_URL_PART,data=auth_creds, headers=headers)
# Check if successful
if auth_response.status_code != 201:
# Output debug information
print("\n Return Code: " + str(auth_response.status_code) + " " + auth_response.reason)
print("Path: " + auth_response.request.path_url)
print("Headers: ")
print(auth_response.request.headers)
print("Body: " + auth_response.request.body)
print("\n")
print("Trace_ID: " + auth_response.json()['meta']['trace_id'])
else:
# Section 2 - Capture OAUTH token and store in headers for later use
print("Token Created")
# Capture the auth token for reuse in subsequent calls, by pulling it from the response
# Note this token can be reused multiple times until it expires after 30 mins
auth_Token = auth_response.json()['access_token']
headers = {
'authorization':'bearer ' + auth_Token,
'accept': 'application/json'
}
# Section 3 - Reuse authentication token to call other Example OAUTH2 APIs
# Build parameter dictionary
call_params = {}
call_params['offset'] ="" # Non-mandatory param
call_params['limit'] ="5000" # The number of results
call_params['sort'] ="" #
call_params['filter'] ="" # To exclude devices
# Call devices API
print("Searching Asset ID by getting from " + BASE_URL + DEVICE_SEARCH)
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
#DEVICE_DETAILS_response = request.get(BASE_URL + DEVICE_DETAILS,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
###########Part that is not working###########
DEVICE_DETAILS_response = requests.get(BASE_URL + DEVICE_DETAILS,headers=headers)
#ID_LIST = str(len(result['resources']).replace(",", "&ids=")
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)

Hi to convert the strings in result['resources']:
['akdjlfijoaidjfod',
'g9jkidfjlskdjf44',
'3jdhfj4hf9dfiiu4']
to : https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4
try this funciton:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
print(get_modified_url(result['resources'], BASE_URL + DEVICE_DETAILS ))
full code would be:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
device_list = []
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
device_list.append(devices)
new_url = get_modified_url(device_list, BASE_URL + DEVICE_DETAILS )
DEVICE_DETAILS_response = requests.get(new_url, headers=headers)
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)

it says "RESTART: C:\python\python ex\facebook EX.py" nothing else

import sys
import urllib.request
import json
if __name__ == '_main_':
# [CODE1]
page_name = "jtbnews"
app_id = "[App ID]"
app_secret = "[App Secret Code]"
access_token = app_id + "I" +app_secret
# [CODE2]
# http://graph.facebook.com/v2.8/[page_id]/?access_token=[App_ID]I[Secret_Key]
# 형식의 문자열을 만들어 낸다.
base = "http://graph.facebook.com/v2.8"
node = "/" + page_name
parameters = "/?access_token=%s" % access_token
url = base + node + parameters
# [CODE3]
req = urllib.request.Request(url)
# [CODE4]
try:
response = urllib.request.urlopen(req)
if response.getcode() == 200:
data = json.loads(response.read().decode('utf-8'))
page_id = data['id']
print("%s Facebook numeric ID : %s" % (page_name, page_id))
except Exception as e: print(e)
I'm a 100% beginner of programming
I follow the code in a book.
I can't figure out what is the problem.
it just says:
"RESTART: C:\python\python ex\facebook EX.py"

Decode .ilearner from Azure

I would like to ask for solutions for the problem with decoding .ilearner file downloaded via API from Microsoft Azure ML. The error I am receiving looks like this:
File "<ipython-input-1-157aac1e9b7a>", line 162, in <module>
invokeBatchExecutionService()
File "<ipython-input-1-157aac1e9b7a>", line 157, in invokeBatchExecutionService
processResults(result)
File "<ipython-input-1-157aac1e9b7a>", line 51, in processResults
saveBlobToFile(url3, "The results for " + outputName)
File "<ipython-input-1-157aac1e9b7a>", line 28, in saveBlobToFile
f.write(response.read().decode("utf8", 'ignore'))
File "D:\XXXXXX\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0
" UnicodeEncodeError: 'charmap' codec can't encode character '\u053e' in position 1463: character maps to <undefined>"
My full code looks like this:
import urllib.request
import json
import time
from azure.storage.blob import *
def printHttpError(httpError):
print("The request failed with status code: " + str(httpError.code))
# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
print(httpError.info())
print(json.loads(httpError.read().decode("utf8", 'ignore')))
return
def saveBlobToFile(blobUrl, resultsLabel):
output_file = "C:/XXX/API/output.csv" # Replace this with the location you would like to use for your output file, and valid file extension (usually .csv for scoring results, or .ilearner for trained models)
print("Reading the result from " + blobUrl)
try:
response = urllib.request.urlopen(blobUrl)
except urllib.error.HTTPError as error:
printHttpError(error)
return
# print("Response: " + response)
with open(output_file, "w+") as f:
# print(response.read())
f.write(response.read().decode("utf8", 'ignore'))
print(resultsLabel + " have been written to the file " + output_file)
return
def processResults(result):
first = True
results = result["Results"]
# print(" Results : " + results)
for outputName in results:
result_blob_location = results[outputName]
sas_token = result_blob_location["SasBlobToken"]
base_url = result_blob_location["BaseLocation"]
relative_url = result_blob_location["RelativeLocation"]
print("The results for " + outputName + " are available at the following Azure Storage location:")
print("BaseLocation: " + base_url)
print("RelativeLocation: " + relative_url)
print("SasBlobToken: " + sas_token)
if (first):
first = False
url3 = base_url + relative_url + sas_token
saveBlobToFile(url3, "The results for " + outputName)
# first = True
return
def uploadFileToBlob(input_file, input_blob_name, storage_container_name, storage_account_name, storage_account_key):
blob_service = BlockBlobService(account_name=storage_account_name, account_key=storage_account_key)
print("Uploading the input to blob storage...")
blob_service.create_blob_from_path(storage_container_name, input_blob_name, input_file)
def invokeBatchExecutionService():
storage_account_name = "XXX" # Replace this with your Azure Storage Account name
storage_account_key = "XXX # Replace this with your Azure Storage Key
storage_container_name = "XXX" # Replace this with your Azure Storage Container name
connection_string = "DefaultEndpointsProtocol=https;AccountName=" + storage_account_name + ";AccountKey=" + storage_account_key
api_key = "XXX” # Replace this with the API key for the web service
url = "https://uswestcentral.services.azureml.net/subscriptions/75b6fa4e098c4ad88df85a3533530bd4/services/4ed857d52f3140e1b422e6e986437b6e/jobs"
uploadFileToBlob("C:/XXX/API/input.csv", # Replace this with the location of your input file, and valid file extension (usually .csv)
"input2.csv", # Replace this with the name you would like to use for your Azure blob; this needs to have the same extension as the input file
storage_container_name, storage_account_name, storage_account_key);
payload = {
"Inputs": {
"input1":
{
"ConnectionString": connection_string,
"RelativeLocation": "/" + storage_container_name + "/input.csv"
},
},
"Outputs": {
"output1":
{
"ConnectionString": connection_string,
"RelativeLocation": "/" + storage_container_name + "/output.csv" # Replace this with the location you would like to use for your output file, and valid file extension (usually .csv for scoring results, or .ilearner for trained models)
},
"output2":
{
"ConnectionString": connection_string,
"RelativeLocation": "/" + storage_container_name + "/output.ilearner" # Replace this with the location you would like to use for your output file, and valid file extension (usually .csv for scoring results, or .ilearner for trained models)
},
},
"GlobalParameters": {
}
}
body = str.encode(json.dumps(payload))
headers = { "Content-Type":"application/json", "Authorization":("Bearer " + api_key)}
print("Submitting the job...")
# submit the job
req = urllib.request.Request(url + "?api-version=2.0", body, headers)
try:
response = urllib.request.urlopen(req)
except urllib.error.HTTPError as error:
printHttpError(error)
return
result = response.read()
job_id = result.decode("utf8", 'ignore')[1:-1]
print("Job ID: " + job_id)
# start the job
print("Starting the job...")
body = str.encode(json.dumps({}))
req = urllib.request.Request(url + "/" + job_id + "/start?api-version=2.0", body, headers)
try:
response = urllib.request.urlopen(req)
except urllib.error.HTTPError as error:
printHttpError(error)
return
url2 = url + "/" + job_id + "?api-version=2.0"
while True:
print("Checking the job status...")
req = urllib.request.Request(url2, headers = { "Authorization":("Bearer " + api_key) })
try:
response = urllib.request.urlopen(req)
except urllib.error.HTTPError as error:
printHttpError(error)
return
result = json.loads(response.read().decode("utf8", 'ignore'))
status = result["StatusCode"]
if (status == 0 or status == "NotStarted"):
print("Job " + job_id + " not yet started...")
elif (status == 1 or status == "Running"):
print("Job " + job_id + " running...")
elif (status == 2 or status == "Failed"):
print("Job " + job_id + " failed!")
print("Error details: " + result["Details"])
break
elif (status == 3 or status == "Cancelled"):
print("Job " + job_id + " cancelled!")
break
elif (status == 4 or status == "Finished"):
print("Job " + job_id + " finished!")
# print("Results: " + results)
processResults(result)
break
time.sleep(1) # wait one second
return
invokeBatchExecutionService()
The code follows guideline described by https://learn.microsoft.com/en-us/azure/machine-learning/studio/retrain-models-programmatically I run the Python 3.5 script .

You are downloading a file encoded as UTF-8, then writing the data to your local filesystem using the filesystem's default encoding (probably cp1252 or similar). Writing to the local filesystem is failing because the data contains characters that cannot be encoded as cp1252.
>>> s = '\u053e'
>>> s
'Ծ'
>>> s.encode('cp1252')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/XXXXXX/data36/lib/python3.6/encodings/cp1252.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
UnicodeEncodeError: 'charmap' codec can't encode character '\u053e' in position 0: character maps to <undefined>
One solution is to encode your output file as UTF-8 (you will need to remember to open the file as UTF-8 if you open it elsewhere in your code)
with open(output_file, "wb+", encoding='utf-8') as f:
f.write(response.read())
Another approach might be to set the errors argument to open to handle encoding errors. For example:
with open(output_file, "w+", errors="replace") as f:
f.write(response.read().decode("utf8", 'ignore'))
will replace un-encodeable characters with a ? character.

Twitter Search API using urllib2

I am beginner to API calls using python (or even just API calls). I am trying a basic call with the Twitter API.
My Code for generating oauth_signature is as follows :
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())
key_dict['oauth_signature'] = urllib.quote(finKey, '')
where key_dict stores all the keys :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9ab59691142584g739134971f75aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json?" + urllib.quote("q=delhi+elections", '')
I generate the Base Header String using the following :
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
Although when I am making the call, I get :
urllib2.HTTPError: HTTP Error 401: Unauthorized
OR
urllib2.URLError: <urlopen error [Errno 60] Operation timed out>
My final request is made using the following :
getSignature("delhi+elections")
headers = { 'Authorization' : getHeaderString()}
req = urllib2.Request(BASE_URL, headers= headers)
response = urllib2.urlopen(req)
Where am I going wrong ?

Few Points that should have been mentioned somewhere :
The method : binascii.b2a_base64(hashed.digest()) appends a new line feed at the end of the string. This cause the oauth_signature to fail Authenticate.
The delhi+elections is actually supposed to be delhi elections. This mismatch again made the Hash Value match in sha1 to fail.
Removing both of them solved the problem.
The final Code :
key_dict = dict()
key_dict['oauth_consumer_key'] = urllib.quote(CONSUMER_KEY, '')
key_dict['oauth_nonce'] = urllib.quote("9aa39691142584s7df134971375aa986", '')
key_dict['oauth_signature_method'] = urllib.quote("HMAC-SHA1", '')
key_dict['oauth_timestamp'] = urllib.quote(str(int(time.time())), '')
key_dict['oauth_token'] = urllib.quote(ACCESS_TOKEN, '')
key_dict['oauth_version'] = urllib.quote(OAUTH_VERSION, '')
BASE_URL = "https://api.twitter.com/1.1/search/tweets.json"
def getSignature(query):
key_dict['q'] = urllib.quote(query, '')
finKey = ""
for key in sorted(key_dict.keys()):
finKey += key + "="+key_dict[key]+"&"
finKey = finKey[:-1]
finKey = HTTP_METHOD + "&" + urllib.quote(BASE_URL, '') + "&" + urllib.quote(finKey, '')
key = urllib.quote(CONSUMER_SECRET_KEY, '')+"&"+urllib.quote(ACCESS_TOKEN_SECRET, '')
hashed = hmac.new(key, finKey, sha1)
finKey = binascii.b2a_base64(hashed.digest())[:-1]
key_dict['oauth_signature'] = urllib.quote(finKey, '')
def getHeaderString():
ret = "OAuth "
key_list =['oauth_consumer_key', 'oauth_nonce', 'oauth_signature', 'oauth_signature_method', 'oauth_timestamp', 'oauth_token', 'oauth_version']
for key in key_list:
ret = ret+key+"=\""+key_dict[key]+"\", "
ret = ret[:-2]
return ret
url = BASE_URL
getSignature("delhi elections")
headers = { 'Authorization' : getHeaderString()}
values = {'q':'delhi elections'}
data = urllib.urlencode(values)
req = urllib2.Request(url+"?"+data, headers= headers)
response = urllib2.urlopen(req)
the_page = response.read()
print the_page

Instead of coding your own client, have you tried using tweepy? For a reference implementation using this library, you can check twitCheck client.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Getting ValueError: unknown url type: ' ' - python

Related

client side caching be able to write responses to the disk and fetch them from the disk when I get a cache hit

Using data from API in subsequent API calls

it says "RESTART: C:\python\python ex\facebook EX.py" nothing else

Decode .ilearner from Azure

Twitter Search API using urllib2

Categories

Resources