Using data from API in subsequent API calls - python

I should preface this with I am not a programmer and most of this code was not written by me. I unfortunately have a need and am trying to hack my way through this.
What I am trying to do is chain a few API calls together to ultimately get a list of IPs. What this script does is queries the API and pulls (and prints) a list of device IDs. The device IDs look like this:
akdjlfijoaidjfod
g9jkidfjlskdjf44
3jdhfj4hf9dfiiu4
The device IDs then need to be passed as a parameter in the next API call like this:
https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4 and so on.
I dont know where to begin. Instead of printing the asset ids, I assume they should be stored as a parameter (or variable) and then appended to the URL. I tried doing that with "ID_LIST" but that didnt seem to work. Can you guys point me in the right direction?
import requests
import json
# Define API REST paths
BASE_URL = "https://api.example.com/"
OAUTH_URL_PART = "oauth2/token"
DEVICE_SEARCH = "devices/queries/devices/v1"
DEVICE_DETAILS = "devices/entities/devices/v1"
# Empty auth token to hold value for subsequent request
auth_Token = ""
# Section 1 - Authenticate to Example OAUTH
# Build a dictionary to hold the headers
headers = {
'Content-type': 'application/x-www-form-urlencoded',
'accept': 'application/json'
}
# Build a dictionary to holds the authentication data to be posted to get a token
auth_creds = {}
auth_creds['client_id'] = "<client_id>"
auth_creds['client_secret'] = "<client_secret>"
auth_creds['grant_type'] = "client_credentials"
# Call the API to get a Authentication token - NOTE the authentication creds
print("Requesting token from " + BASE_URL + OAUTH_URL_PART)
auth_response = requests.post(BASE_URL + OAUTH_URL_PART,data=auth_creds, headers=headers)
# Check if successful
if auth_response.status_code != 201:
# Output debug information
print("\n Return Code: " + str(auth_response.status_code) + " " + auth_response.reason)
print("Path: " + auth_response.request.path_url)
print("Headers: ")
print(auth_response.request.headers)
print("Body: " + auth_response.request.body)
print("\n")
print("Trace_ID: " + auth_response.json()['meta']['trace_id'])
else:
# Section 2 - Capture OAUTH token and store in headers for later use
print("Token Created")
# Capture the auth token for reuse in subsequent calls, by pulling it from the response
# Note this token can be reused multiple times until it expires after 30 mins
auth_Token = auth_response.json()['access_token']
headers = {
'authorization':'bearer ' + auth_Token,
'accept': 'application/json'
}
# Section 3 - Reuse authentication token to call other Example OAUTH2 APIs
# Build parameter dictionary
call_params = {}
call_params['offset'] ="" # Non-mandatory param
call_params['limit'] ="5000" # The number of results
call_params['sort'] ="" #
call_params['filter'] ="" # To exclude devices
# Call devices API
print("Searching Asset ID by getting from " + BASE_URL + DEVICE_SEARCH)
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
#DEVICE_DETAILS_response = request.get(BASE_URL + DEVICE_DETAILS,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
###########Part that is not working###########
DEVICE_DETAILS_response = requests.get(BASE_URL + DEVICE_DETAILS,headers=headers)
#ID_LIST = str(len(result['resources']).replace(",", "&ids=")
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)

Hi to convert the strings in result['resources']:
['akdjlfijoaidjfod',
'g9jkidfjlskdjf44',
'3jdhfj4hf9dfiiu4']
to : https://api.example.com/devices/entities/devices/v1?ids=akdjlfijoaidjfod&ids=g9jkidfjlskdjf44&ids=3jdhfj4hf9dfiiu4
try this funciton:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
print(get_modified_url(result['resources'], BASE_URL + DEVICE_DETAILS ))
full code would be:
def get_modified_url(mylist, myurl):
url = myurl + '?'
for idx, b in enumerate(mylist): # enumerate list to get index and element in the list
if idx > 0:
url += '&ids=' + b # append &ids= to url if not first device id
else:
url += 'ids=' + b # append ids= to url if first device id
return url
device_list = []
DEVICE_search_response = requests.get(BASE_URL + DEVICE_SEARCH,params=call_params,headers=headers)
# Check for errors
if DEVICE_search_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_search_response.status_code) + " " + DEVICE_search_response.reason)
print("Path: " + DEVICE_search_response.request.path_url)
print("Headers: ")
print(DEVICE_search_response.request.headers)
print("Body: " + DEVICE_search_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_search_response.json()['meta']['trace_id'])
else:
# Iterate the results and print
result = DEVICE_search_response.json()
print("DEVICE found on " + str(len(result['resources'])) + " the following device id:")
for devices in result['resources']:
print(devices)
device_list.append(devices)
new_url = get_modified_url(device_list, BASE_URL + DEVICE_DETAILS )
DEVICE_DETAILS_response = requests.get(new_url, headers=headers)
if DEVICE_DETAILS_response.status_code != 200:
# Output debug information
print("\n Return Code: " + str(DEVICE_DETAILS_response.status_code) + " " + DEVICE_DETAILS_response.reason)
print("Path: " + DEVICE_DETAILS_response.request.path_url)
print("Headers: ")
print(DEVICE_DETAILS_response.request.headers)
print("Body: " + DEVICE_DETAILS_response.request.body)
print("\n")
print("Trace_ID: " + DEVICE_DETAILS_response.json()['meta']['trace_id'])
else:
result = DEVICE_DETAILS_response.json()
print("Device Details Found")
for details in result['resources']:
print(details)

Related

Starting threads in a for cycle produces multiple results

-I found the problem!- In function SendMessage I was using UserID (with capital letters) instead of userid (which was the actual parameter passed to each thread). So Python printed the UserID of the for cycle instead of the "individual" userid passed to the different functions. It was only a logging problem, the program sent messages correctly.
I have a for that loops through the elements of a user's list. Each iteration, I would like to start a separate background thread to send a message to that user. By saying "send a message" I mean a simple POST request made using the requests Python lib. At the end of the thread, an output on the console is written. Every 24 requests (so every 24 threads) the app needs to stop for about a second.
Success = 0
Bounces = 0
def SendMessage(botid, token, userid, messageid, tag):
global Success
global Bounces
try:
payload = {...}
r = requests.post("...", params=payload, headers=head, timeout=2)
#problem with request?
pjson = json.loads(r.text)
if r.status_code != 200:
log(str(r.status_code) + " " + pjson["result"] + " UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
return
Success += 1
return
except requests.exceptions.Timeout:
#wait for connection to be available again!
while not conn_available():
print("... Waiting for a new connection...")
time.sleep(10)
log("Request timed out. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.ConnectionError:
log("Unable to connect. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.HTTPError:
log("Invalid request. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
except requests.exceptions.RequestException:
log("Invalid request. UserID: " + UserID + "; URL: " + "..." + BotID + "/users/" + UserID + "/send; Params: " + str(payload))
Bounces += 1
while True:
newMsgsReq = ""
try:
#Check for new messages
newMsgsReq = requests.get("...", timeout=2)
if newMsgsReq.text == "false":
#exit sub
time.sleep(2)
continue
except requests.exceptions.HTTPError as errh:
log("Request has failed: There was an error in the request: [" + str(errh) + "]")
time.sleep(2)
continue
except requests.exceptions.ConnectionError as errc:
log("Request has failed: check internet connection & retry: [" + str(errc) + "]")
time.sleep(2)
continue
except requests.exceptions.Timeout as errt:
log("Request has failed: check internet connection & retry: [" + str(errt) + "]")
time.sleep(2)
continue
except requests.exceptions.RequestException as err:
log("Request has failed: There was an error in the request: [" + str(err) + "]")
time.sleep(2)
continue
#we have a message!!!
#Extract BotID, Token, MessageID
msgInf = newMsgsReq.text.split("|")
MessageID = msgInf[0]
BotID = msgInf[1]
Token = msgInf[2]
Tag = msgInf[3]
del msgInf[0:4]
suc("New message found: " + str(MessageID))
suc("Total recipients: " + str(len(msgInf)))
#Begin send!
Cycles = 0
TotCycles = 0
#Loop through msgInf
for UserID in msgInf:
#Create the thread.
process = threading.Thread(target=SendMessage, args=[BotID, Token, UserID, MessageID, Tag])
process.start()
TotCycles += 1
pb.print_progress_bar(TotCycles)
Cycles += 1
if Cycles == 24:
time.sleep(1)
Cycles = 0
suc("Message " + str(MessageID) + " sent successfully (" + str(Success) + " success, " + str(Bounces) + " bounces")
Success = 0
Bounces = 0
time.sleep(3)
Let's say my user list is:
{1, 2, 3, 4, ..., 24, 25, ...}. I expect my application to output:
1. Message 1 sent successfully...
2. Message 2 sent successfully...
...
24. Message 24 sent successfully.
Instead, I am getting this output:
1. Message 1 sent successfully.
2. Message 1 sent successfully.
...
24. Message 1 sent successfully.
So all the 24 outputs are related to the first of the 24 ids. It seems like the for loop does not proceed...
This prints the incremented counter without any trouble so I think you may need to provide all of the code and some sample input.
import threading
import time
def SendMessage(userid):
print(userid)
while True:
cycles = 1
for user_id in [1, 2, 3]:
process = threading.Thread(target=SendMessage, args=[user_id])
process.start()
cycles += 1
if cycles == 24:
time.sleep(1)
cycles = 0
time.sleep(3)
Run it on repl.it

Nested for loop keeps repeating

I have a python scraper main purpose
Read list of postcodes from text to an array
for each postcode in array
search 10 pages
pull out certain content.
i seem to be getting the results like:
page 1
page 2
page 2
page 3
page 3
page 3
page 4
page 4
page 4
page 4
etc
i have tried re arranging the code several times without much look, everything works fine expcept this step
from bs4 import BeautifulSoup
import time
from time import sleep
from datetime import datetime
import requests
import csv
print(" Initializing ...")
print(" Loading Keywords")
with open("pcodes.txt") as pcodes:
postkeys = []
for line in pcodes:
postkeys.append(line.strip())
with open("pcodnum.txt") as pcodnum:
postkeynum = []
for line in pcodnum:
postkeynum.append(line.strip())
print(" Welcome to YellScrape v1.0")
print(" You ar searching yell.com ")
comtype = input(" Please enter a Company Type (e.g Newsagent, Barber): ")
pagesnum = 0
listinnum = 0
comloc = " "
f = csv.writer(open(datetime.today().strftime('%Y-%m-%d') + '-' + comtype + '-' + 'yelldata.csv', 'w'))
f.writerow(['Business Name', 'Business Type', 'Phone Number', 'Street Address', 'Locality', 'Region', 'Website'])
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
data_list = []
for x in postkeys:
print(" Searching " + x + " for " + comtype + " companies")
for y in postkeynum:
url = 'https://www.yell.com/ucs/UcsSearchAction.do?keywords=' + comtype + '&pageNum=' + str(y) + '&location=' + x
data_list.append(url)
for item in data_list:
site = requests.get(item, headers=headers)
soup = BeautifulSoup(site.content, 'html.parser')
questions = soup.select('.businessCapsule--mainContent')
for question in questions:
listinnum += 1
busname = question.find(class_='businessCapsule--name').get_text()
bustype = question.find(class_='businessCapsule--classification').get_text()
busnumber = question.select_one('span.business--telephoneNumber')
if busnumber is None:
busnumber = 'None'
else:
busnumber = busnumber.text
busadd = question.find('span', attrs={"itemprop": "streetAddress"})
if busadd is None:
busadd = 'None'
else:
busadd = busadd.text.replace(',',' ')
buslocal = question.find('span', attrs={"itemprop": "addressLocality"})
if buslocal is None:
buslocal = 'None'
else:
buslocal = buslocal.text
buspost = question.find('span', attrs={"itemprop": "postalCode"})
if buspost is None:
buspost = 'None'
else:
buspost = buspost.text
busweb = question.find('a', attrs={"rel": "nofollow noopener"})
if busweb is None:
busweb = 'None'
else:
busweb = busweb.attrs['href']
print(busweb)
f.writerow([busname, bustype, busnumber, busadd, buslocal, buspost, busweb])
pagesnum += 1
print(" Finsihed Page " + str(y) + ". For " + x + " . " + str(listinnum) + " listings so far. Moving To Next Page")
print(" Waiting 30 seconds for security reasons.")
sleep(30)
print(" Finished. \n Total: " + str(pagesnum) + " pages with " + str(listinnum) + " listings. \n Please look for file: " + datetime.today().strftime('%Y-%m-%d') + '-' + comtype + '-' + 'yelldata.csv')
Expected result:
finished page 1
finished page 2
finished page 3
etc
It's because you are appending to your data list and then using a for loop to iterate through it after each time it's appending a new link.
So it's going to do requests for page 1, then requests for page 1 and requests for page 2, then page 1, 2 and 3, then page 1, 2, 3, and 4... etc.
So there's 2 ways to fix that. 1) don't append to the data_list and eliminate that all together, or 2) you can append to the data_list FIRST, and then loop through it (so separate the loop where you append to data_list and iterate through data_list.
I choose option 2)
from bs4 import BeautifulSoup
import time
from time import sleep
from datetime import datetime
import requests
import csv
print(" Initializing ...")
print(" Loading Keywords")
with open("C:/pcodes.txt") as pcodes:
postkeys = []
for line in pcodes:
postkeys.append(line.strip())
with open("C:/pcodnum.txt") as pcodnum:
postkeynum = []
for line in pcodnum:
postkeynum.append(line.strip())
print(" Welcome to YellScrape v1.0")
print(" You are searching yell.com ")
comtype = input(" Please enter a Company Type (e.g Newsagent, Barber): ")
pagesnum = 0
listinnum = 0
comloc = " "
f = csv.writer(open('C:/'+datetime.today().strftime('%Y-%m-%d') + '-' + comtype + '-' + 'yelldata.csv', 'w'))
f.writerow(['Business Name', 'Business Type', 'Phone Number', 'Street Address', 'Locality', 'Region', 'Website'])
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
}
data_list = []
for x in postkeys:
print(" Searching " + x + " for " + comtype + " companies")
for y in postkeynum:
url = 'https://www.yell.com/ucs/UcsSearchAction.do?keywords=' + comtype + '&pageNum=' + str(y) + '&location=' + x
data_list.append(url)
# Now that you created a list of the urls, now you can loop through them
for item in data_list:
page = item.split('pageNum=')[-1].split('&')[0]
location = item[-5:]
site = requests.get(item, headers=headers)
soup = BeautifulSoup(site.content, 'html.parser')
questions = soup.select('.businessCapsule--mainContent')
for question in questions:
listinnum += 1
busname = question.find(class_='businessCapsule--name').get_text()
bustype = question.find(class_='businessCapsule--classification').get_text()
busnumber = question.select_one('span.business--telephoneNumber')
if busnumber is None:
busnumber = 'None'
else:
busnumber = busnumber.text
busadd = question.find('span', attrs={"itemprop": "streetAddress"})
if busadd is None:
busadd = 'None'
else:
busadd = busadd.text.replace(',',' ')
buslocal = question.find('span', attrs={"itemprop": "addressLocality"})
if buslocal is None:
buslocal = 'None'
else:
buslocal = buslocal.text
buspost = question.find('span', attrs={"itemprop": "postalCode"})
if buspost is None:
buspost = 'None'
else:
buspost = buspost.text
busweb = question.find('a', attrs={"rel": "nofollow noopener"})
if busweb is None:
busweb = 'None'
else:
busweb = busweb.attrs['href']
print(busweb)
f.writerow([busname, bustype, busnumber, busadd, buslocal, buspost, busweb])
pagesnum += 1
print(" Finished Page " + page + ". For " + location + " . " + str(listinnum) + " listings so far. Moving To Next Page")
if item != data_list[-1]:
print(" Waiting 30 seconds for security reasons.")
sleep(30)
print(" Finished. \n Total: " + str(pagesnum) + " pages with " + str(listinnum) + " listings. \n Please look for file: " + datetime.today().strftime('%Y-%m-%d') + '-' + comtype + '-' + 'yelldata.csv')
Initialize pageNum inside for loop:
for x in postkeys:
pageNum = 1
Increment pageNum side for loop and format URL
for item in data_list:
#format website url
url = "https://www.yell.com/ucs/UcsSearchAction.do?keywords={}&pageNum={}&location={}".format(comtype, pageNum, x)
site = requests.get(url, headers=headers)
# check response status code:
if site.status_code != 200:
break
pageNum += 1
You should remove this for loop:
for y in postkeynum:
url = 'https://www.yell.com/ucs/UcsSearchAction.do?keywords=' + comtype + '&pageNum=' + str(y) + '&location=' + x
data_list.append(url)

Getting ValueError: unknown url type: ' '

I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
print(tracks)
print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'}
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing?
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
#print(tracking_url)
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
Full working exemple that reproduces the error:
import requests
import json
import urllib.request, urllib.error, urllib.parse
import socket
apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example
apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/'
api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example
def song_lyric(song_name, artist_name):
while True:
querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote(
song_name) + "&q_artist=" + urllib.parse.quote(
artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1"
# matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
while True:
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
json_obj = json.loads(raw.decode('utf-8'))
body = json_obj["message"]["body"]["lyrics"]["lyrics_body"]
copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"]
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
print("Tracking_url====================" +tracking_url + "==================================")
lyrics_tracking(tracking_url)
return (body + "\n\n" + copyright)
def lyrics_tracking(tracking_url):
while True:
querystring = tracking_url
request = urllib.request.Request(querystring)
# request.add_header("Authorization", "Bearer " + client_access_token)
request.add_header("User-Agent",
"curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned
try:
response = urllib.request.urlopen(request,
timeout=4) # timeout set to 4 seconds; automatically retries if times out
raw = response.read()
except socket.timeout:
print("Timeout raised and caught")
continue
break
print(raw)
ID = 0
#get top artists from country
artists = {}
for i in range(2, 3):
artists_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key)
artists_data = artists_response.json()
for artist in artists_data["topartists"]["artist"]:
name = artist["name"]
url = artist["url"]
if ID > 1: continue
artists[ID] = {}
artists[ID]['ID'] = ID
artists[ID]['name'] = name
ID += 1
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
artist_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key)
artist_data = artist_response.json()
# get top albums of the artists
albums = {}
for i, v in artists.items():
chosen = artists[i]['name'].replace(" ", "+")
topalbums_response = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5')
albums_data = topalbums_response.json()
for album in albums_data['topalbums']['album']:
name = album["name"]
url = album["url"]
albums[ID] = {}
albums[ID]['ID'] = ID
albums[ID]['artist'] = artists[i]['name']
albums[ID]['artistID'] = artists[i]['ID']
albums[ID]['name'] = name
ID += 1
# Get tracks of the album
tracks = {}
for i, v in albums.items():
artist = albums[i]['artist'].replace(" ", "+")
name = albums[i]['name'].replace(" ", "+")
album_response_data = requests.get(
'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name)
album_response = album_response_data.json()
for album in album_response['album']['tracks']['track']:
title = album['name']
tracks[ID] = {}
tracks[ID]['trackID'] = ID
tracks[ID]['title'] = title
tracks[ID]['artist'] = albums[i]['artist']
tracks[ID]['album'] = albums[i]['name']
tracks[ID]['albumID'] = albums[i]['ID']
ID += 1
for i, v in tracks.items():
artist = tracks[i]['artist'].replace(" ", "+")
title = tracks[i]['title'].replace(" ", "+")
# print the lyric of each track
print(song_lyric(title, artist))
It seems like url is not correct. It happens here:
tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"]
If you have ability to run that API locally and see what is returned into tracking_url, you can find out what is still wrong with it.
UPDATE:
I reproduced it, so the urllib.request cannot process empty string URL: "", that is why you need to check if the tracking_url != "" and only if its not empty string or None you need to request for the song.

Python TypeError on executing weather service code

I am using a weather API to design a slack bot service using python.
My source code is-
import requests
import re
import json
from bs4 import BeautifulSoup
def weather(cityname):
cityid = extractid(cityname)
url = "http://api.openweathermap.org/data/2.5/forecast?id=" + str(cityid) + "&APPID=c72f730d08a4ea1d121c8e25da7e4411"
while True:
r = requests.get(url, timeout=5)
while r.status_code is not requests.codes.ok:
r = requests.get(url, timeout=5)
soup = BeautifulSoup(r.text)
data = ("City: " + soup.city["name"] + ", Country: " + soup.country.text + "\nTemperature: " + soup.temperature["value"] +
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
# print data
return data
def extractid(cname):
with open('/home/sourav/Git-Github/fabulous/fabulous/services/city.list.json') as data_file:
data = json.load(data_file)
for item in data:
if item["name"] == cname:
return item["id"]
def on_message(msg, server):
text = msg.get("text", "")
match = re.findall(r"~weather (.*)", text)
if not match:
return
searchterm = match[0]
return weather(searchterm.encode("utf8"))
on_bot_message = on_message
But executing the code gives the following error-
File "/usr/local/lib/python2.7/dist-packages/fabulous-0.0.1-py2.7.egg/fabulous/services/weather.py", line 19, in weather
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
TypeError: 'NoneType' object has no attribute '__getitem__'
I can't figure out what's the error. Please help!
__getitem__ is called when you ask for dictionary key like a['abc'] translates to a.__getitem__('abc')
so in this case one attribute of soup is None (speed, direction or weather)
ensure that your r.text contains data you want, simply print it:
print(r.text)
list structure in parsed data:
for child in soup.findChildren():
print child
always assume your entry data might be wrong, instead doing soup.city do soup.find('city'), it might be empty so:
city = soup.find('city')
if len(city):
city_name = city[0]['name']
else:
city_name = 'Error' # or empty, or sth

Python: String indices must be integers

I'm working on some Python code to automate github merge requests.
I found the following code below. When I run this, I get TypeError: string indices must be integers.
I've found several threads on here refrencing this error, but I'm not quit sure how to implement the fixes in the code.
#!/usr/bin/env python
import json
import requests
import datetime
OAUTH_KEY = "xxxxxxxxxxxx"
repos = ['my_app'] # Add all repo's you want to automerged here
ignore_branches = ['master', 'release', 'staging', 'development'] # Add 'master' here if you don't want to automerge into master
# Print merge/no-merge message to logfile
def print_message(merging):
if merging == True:
message = "Merging: "
else:
message = "Not merging: "
print message + str(pr_id) + " - " + user + " wants to merge " + head_ref + " into " + base_ref
# Merge the actual pull request
def merge_pr():
r = requests.put("https://api.github.com/repos/:owner/%s/pulls/%d/merge"%(repo,pr_id,),
data=json.dumps({"commit_message": "Auto_Merge"}),
auth=('token', OAUTH_KEY))
if "merged" in r.json() and r.json()["merged"]==True:
print "Merged: " + r.json()['sha']
else:
print "Failed: " + r.json()['message']
# Main
print datetime.datetime.now()
for repo in repos:
r = requests.get('https://api.github.com/repos/:owner/%s/pulls' % repo, auth=('token', OAUTH_KEY))
data = r.json()
for i in data:
head_ref=i["head"]["ref"]
base_ref=i["base"]["ref"]
user=i["user"]["login"]
pr_id = i["number"]
if base_ref in ignore_branches:
print_message(False)
else:
print_message(True)
merge_pr()
which line of code is showing a problem?
if it is this line:
'else:
message = "Not merging: "
print message + str(pr_id) + " - " + user + " wants to merge " + head_ref + " into " + base_ref'
then try putting this code right below
if merging == True:
message = "Merging: "
:
elif message == False:
message = "Not merging: "
print message + pr_id + " - " + user + " wants to merge " + head_ref + " into " + base_ref ''

Categories