I am trying to run a sample code where I retrieve a list of ontology names from a website and I get this error. I'm not really sure what is going on and what I should do to fix this issue. Any help would be greatly appreciated!
This is the code I am trying to run:
import urllib.request, urllib.error, urllib.parse
import json
import ssl
import requests
import os
from pprint import pprint
REST_URL = "http://data.bioontology.org"
API_KEY = ""
def get_json(url):
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ctx))
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
return json.loads(opener.open(url).read())
# Get the available resources
resources = get_json(REST_URL + "/")
# Get the ontologies from the `ontologies` link
ontologies = get_json(resources["links"]["ontologies"])
# Get the name and ontology id from the returned list
ontology_output = []
for ontology in ontologies:
ontology_output.append(f"{ontology['name']}\n{ontology['#id']}\n")
# Print the first ontology in the list
pprint(ontologies[0])
# Print the names and ids
print("\n\n")
for ont in ontology_output:
print(ont)
This is the error message I am getting:
Traceback (most recent call last):
File "listOnt.py", line 23, in <module>
ontologies = get_json(resources["links"]["ontologies"])
File "listOnt.py", line 17, in get_json
return json.loads(opener.open(url).read())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 531, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 640, in http_response
response = self.parent.error(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 401: Unauthorized
Related
I'm trying to use the pastebin api with docs: python https://pastebin.com/doc_api. Using the urllib library: https://docs.python.org/3/library/urllib.html.
import urllib.request
import urllib.parse
def main():
def pastebinner():
site = 'https://pastebin.com/api/api_post.php'
dev_key =
code = "12345678910, test"
our_data = urllib.parse.urlencode({"api_dev_key": dev_key, "api_option": "paste", "api_paste_code": code})
our_data = our_data.encode()
resp = urllib.request.urlopen(site, our_data)
print(resp.read())
pastebinner()
if __name__ == "__main__":
main()
Here's the error i get:
File "C:\Program
Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 214, in urlopen
return opener.open(url, data, timeout) File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 523, in open
response = meth(req, response) File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 632, in http_response
response = self.parent.error( File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 561, in error
return self._call_chain(*args) File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 494, in _call_chain
result = func(*args) File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.1520.0_x64__qbz5n2kfra8p0\lib\urllib\request.py",
line 641, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 422: Unprocessable entity
Any ideas regarding the reason for getting this error?
bump: I still have no idea please help.
bump2: :v
You are using urllib.request.urlopen(site, our_data) which is an HTTP GET (default for anything in urllib). You need to do an HTTP POST instead. Obligatory w3 link
Please note that the code below is untested
import urllib.request
import urllib.parse
def main():
def pastebinner():
site = 'https://pastebin.com/api/api_post.php'
dev_key = 'APIKEYGOESHERE'
code = "12345678910, test"
our_data = urllib.parse.urlencode({"api_dev_key": dev_key, "api_option": "paste", "api_paste_code": code})
our_data = our_data.encode()
request = urllib.request.Request(site, method='POST')
resp = urllib.request.urlopen(request, our_data)
print(resp.read())
pastebinner()
if __name__ == "__main__":
main()
The error is very unhelpful. I mean, why not return a teapot response instead?
leaving this here in case anyone else runs into this issue. Not 100% sure about this, will test later DONT USE URLLIB2 USE httplib2. I believe that will fix your problem.
i am attempting to make a program that downloads a series of product pictures from a site using python. The site stores its images under a certain url format https://www.sitename.com/XYZabcde where XYZ are three letters that represent the brand of the product and abcde are a series of numbers in between 00000 and 30000.
here is my code:
import urllib.request
def down(i, inp):
full_path = 'images/image-{}.jpg'.format(i)
url = "https://www.sitename.com/{}{}.jpg".format(inp,i)
urllib.request.urlretrieve(url, full_path)
print("saved")
return None
inp = input("brand :" )
i = 20100
while i <= 20105:
x = str(i)
y = x.zfill(5)
z = "https://www.sitename.com/{}{}.jpg".format(inp,y)
print(z)
down(y, inp)
i += 1
With the code i have written i can successfully download a series of pictures from it which i know exist for example brand RVL from 20100 to 20105 will succesfully download those six pictures.
however when i broaden the while loop to include links i dont know will give me an image i get this error code :
Traceback (most recent call last):
File "c:/Users/euan/Desktop/university/programming/Python/parser/test - Copy.py", line 20, in <module>
down(y, inp)
File "c:/Users/euan/Desktop/university/programming/Python/parser/test - Copy.py", line 6, in down
urllib.request.urlretrieve(url, full_path)
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\euan\AppData\Local\Programs\Python\Python38\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
what can i do to check and avoid any url that would yield this result?
You cannot as such know in advance which URLs you don't have access to, but you can surround the download with a try-except:
import urllib.request, urllib.error
...
def down(i, inp):
full_path = 'images/image-{}.jpg'.format(i)
url = "https://www.sitename.com/{}{}.jpg".format(inp,i)
try:
urllib.request.urlretrieve(url, full_path)
print("saved")
except urllib.error.HTTPError as e:
print("failed:", e)
return None
In that case it will just print e.g. "failed: HTTP Error 403: Forbidden" whenever a URL cannot be fetched, and the program will continue.
My current program looks like this
import os
import urllib.request
baseUrl = "https://website.com/wp-content/upload/xxx/yyy/zzz-%s.jpg"
for i in range(1,48):
url = baseUrl % i
urllib.request.urlretrieve(baseUrl, os.path.basename(url))
I haven't coded python in a long time, but I wrote this using urllib2 back when I used to use Python2.7.
It is supposed to replace the %s in the URL and loop through 1-48, and download all the images to the directory that the script is in. But i get alot of errors.
edit : Here is the error that is thrown.
Traceback (most recent call last):
File "download.py", line 9, in <module>
urllib.request.urlretrieve(url, os.path.basename(url))
File "C:\Program Files\Python37\lib\urllib\request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Program Files\Python37\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Program Files\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Program Files\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Program Files\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Program Files\Python37\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
urllib.request is only available on Python 3 so you have to run the code in Python 3.
Try using the requests module:
import requests
baseUrl = "https://website.com/wp-content/upload/xxx/yyy/zzz-%s.jpg"
for i in range(1,48):
url = baseUrl % i
response = requests.get(url)
my_raw_data = response.content
with open(os.path.basename(url), 'wb') as my_data:
my_data.write(my_raw_data)
my_data.close()
Just to add, you must use url in the request, not the baseUrl as shown in your code :
import os
import urllib.request
baseUrl = "https://website.com/wp-content/upload/xxx/yyy/zzz-%s.jpg"
for i in range(1,48):
url = baseUrl % i
#urllib.request.urlretrieve(baseUrl, os.path.basename(url))
#Use This line :
urllib.request.urlretrieve(url, os.path.basename(url))
Run this in Python 3
Simple fix, if you pass the correct string:
urllib.request.urlretrieve(url, os.path.basename(url))
The documentation says urlretrieve is a Legacy carryover, so you might want to find a different way to do this.
I found this alternate approach modified from another SO answer:
import os
import requests
baseUrl = "https://website.com/wp-content/upload/xxx/yyy/zzz-%s.jpg"
for i in range(1,48):
url = baseUrl % i
r = requests.get(url)
open(os.path.basename(url), 'wb').write(r.content)
I am learning python networking. I had learnt socket and now I want to learn python HTTP to connect to HTTPServer, extract cookies etc. I am facing this problem with cookie extraction. Tried google but didn't found a solution, here is the code:
import cookielib
import urllib
import urllib2
ID_USERNAME= 'id_username'
ID_PASSWORD = 'id_password'
USERNAME = 'you#email.com'
PASSWORD = 'mypassword'
LOGIN_URL = 'https://bitbucket.org/account/signin/?next=/'
NORMAL_URL = 'https://bitbucket.org/'
def extract_cookie_info():
cj=cookielib.CookieJar()
login_data= urllib.urlencode({ID_USERNAME : USERNAME,ID_PASSWORD:PASSWORD})
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
resp = opener.open(LOGIN_URL,login_data)
for cookie in cj:
print "First time cookie: %s ----> %s" %(cookie.name,cookie.value)
print "Headers: %s"%resp.headers
resp = opener.open(NORMAL_URL)
for cookie in cj:
print "Second time cookie: %s --> %s"%(cookie.name,cookie.value)
print "Headers : %s"%resp.headers
if __name__ == '__main__':
extract_cookie_info()
This is the error:
Traceback (most recent call last):
File "e.py",line 27,in <module>
extract_cookie_info()
File "e.py",line 16,in extract_cookie_info
resp=opener.open(LOGIN_URL,login_data)
File "C:\Python27\lib\urllib2.py",line 435, in open
response = meth(req,response)
File "C:\Python27\lib\urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 473, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 407, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 556, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 403: Forbidden
You are sending your login details as POST data rather than as part of the url.
>>> url = 'https://bitbucket.org/account/signin/'
>>> user = 'foo#example.com'
>>> pwd = 'secret'
>>> d = urlencode({'ID_USERNAME': user, 'ID_PASSWORD': pwd})
>>> cj = cookielib.CookieJar()
>>> opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
>>> resp = opener.open(url + '?' + d)
>>> res.getcode()
200
>>> for cookie in cj:print cookie.name
...
csrftoken
I've written the following code in python that goes to the url in the array and finds specific info about that page - a web scraper of sorts. This one takes in an array of Reddit threads and outputs the score of each thread. This program almost never executes completely. Usually, i'll get through 5 or so iterations before receiving the error message below. Could someone please help me get to the bottom of this?
import urllib2
from bs4 import BeautifulSoup
urls = ['http://www.reddit.com/r/videos/comments/1i12o2/soap_precursor_to_a_lot_of_other_hilarious_shows/', 'http://www.reddit.com/r/videos/comments/1i12nx/kid_reporter_interviews_ryan_reynolds/', 'http://www.reddit.com/r/videos/comments/1i12ml/just_my_two_boys_going_full_derp_shocking_plot/']
for x in urls:
f = urllib2.urlopen(x)
data = f.read()
soup = BeautifulSoup(data)
span = soup.find('span', attrs={'class':'number'})
print '{}:{}'.format(x, span.text)
The error message I am getting is:
Traceback (most recent call last):
File "C:/Users/jlazarus/Documents/YouTubeparse2.py", line 7, in <module>
f = urllib2.urlopen(x)
File "C:\Python27\lib\urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 410, in open
response = meth(req, response)
File "C:\Python27\lib\urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 448, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 382, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
HTTPError: HTTP Error 429: Unknown
Ignore with a try and except rule to catch the error, this is what you want if you just want to skip past the error.
import urllib2
from bs4 import BeautifulSoup
urls = ['http://www.reddit.com/r/videos/comments/1i12o2/soap_precursor_to_a_lot_of_other_hilarious_shows/', 'http://www.reddit.com/r/videos/comments/1i12nx/kid_reporter_interviews_ryan_reynolds/', 'http://www.reddit.com/r/videos/comments/1i12ml/just_my_two_boys_going_full_derp_shocking_plot/']
for x in urls:
try:
f = urllib2.urlopen(x)
data = f.read()
soup = BeautifulSoup(data)
span = soup.find('span', attrs={'class':'number'})
print '{}:{}'.format(x, span.text)
except HTTPError:
print("HTTP Error, continuing")