How do you use a POST URI in Python? - python

I am trying to use a POST URI in my code to access NCBO's Annotator tool. My current code is a GET request, but I don't know how to format this into a POST request. My data is the text variable.
All the examples which I've seen uses request.get(url) -> request.post(url, data=data), but how can I do this for build_opener() and json.loads()?
Here is my code:
def get_json(url):
#get annotations
opener = urllib.request.build_opener()
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
return json.loads(opener.open(url).read())
text = "random text with a lot of words"
annotations = get_json("http://data.bioontology.org/annotator?text=" + urllib.parse.quote(text))
Updated code:
def get_annotations(text, url):
headers = [('Authorization', 'apikey token=' + API_KEY)]
data = text
response = requests.request("POST",url,headers=headers,data=data)
return response.text.encode('utf-8')
annotations = get_annotations(text, "http://data.bioontology.org/annotator?text=" + urllib.parse.quote(text))
Error:
response = requests.request("POST",url,headers=headers,data=data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 516, in request
prep = self.prepare_request(req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 449, in prepare_request
p.prepare(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/models.py", line 315, in prepare
self.prepare_headers(headers)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/models.py", line 447, in prepare_headers
for header in headers.items():
AttributeError: 'list' object has no attribute 'items'

I will recommend you to import requests module, this module is easy to choose request method.
import requests
def get_json(url):
headers = [('Authorization', 'apikey token=' + API_KEY)] #add headers here
data = #params you want to send
response = requests.request("POST",url,headers=headers,data=data) #you can choose your methods here.
return response.text.encode('utf-8')
FYR simply like this.

Related

Python - how to pass variables in another variables

I'm trying to pass data of two variables into another variable but I'm getting following error:
Traceback (most recent call last):
File "poll_azure_devops_audit_log.py", line 28, in <module>
x = requests.request('GET', URL, proxies=PROXIES, auth=AZURE_AUTH).json()
File "/opt/splunk/lib/python3.7/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, kwargs)
File "/opt/splunk/lib/python3.7/site-packages/requests/sessions.py", line 519, in request
prep = self.prepare_request(req)
File "/opt/splunk/lib/python3.7/site-packages/requests/sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "/opt/splunk/lib/python3.7/site-packages/requests/models.py", line 317, in prepare
self.prepare_auth(auth, url)
File "/opt/splunk/lib/python3.7/site-packages/requests/models.py", line 548, in prepare_auth
r = auth(self)
TypeError: 'str' object is not callable
How do I have to pass the variables USER & PASSWORD into variable AZURE_AUTH?
import sys
import requests
import json
from urllib.parse import quote
from requests.auth import HTTPBasicAuth
ORGANIZATION = 'xxx'
CONTINUATIONTOKEN = None
API_ENDPOINT = 'https://auditservice.dev.azure.com/'
USER = 'xxx'
PASSWORD = 'xxx'
AZURE_AUTH = HTTPBasicAuth('user123', 'password123')
URL = API_ENDPOINT + ORGANIZATION + '/_apis/audit/auditlog?api-version=6.1-preview&skipAggregation=true'
PROXIES = {
'http' : 'http://xxx:8080',
'https' : 'https://xxx:8080',
}
print(URL)
print(AZURE_AUTH)
print(USER)
print(PASSWORD)
print(PROXIES)
x = requests.request('GET', URL, proxies=PROXIES, auth=AZURE_AUTH).json()
print (x)
I think the problem you think you have is not the problem you actually have. To pass USER and PASSWORD to HTTPBasicAuth is done like this:
AZURE_AUTH = HTTPBasicAuth(USER, PASSWORD)
and you can assign any value you want to your USER and PASSWORD variables like this :
USER = 'user123'
PASSWORD = 'password123'
If you get an error it is comming from somewhere else

How to solve 'RecursionError: maximum recursion depth exceeded' with Eventlet and Requests in Python

I am trying to implement the Amazon Web Scraper mentioned here. However, I get the output mentioned below. The output repeats until it stops with RecursionError: maximum recursion depth exceeded.
I have already tried downgrading eventlet to version 0.17.4 as mentioned here.
Also, the requestsmodule is getting patched as you can see in helpers.py.
helpers.py
import os
import random
from datetime import datetime
from urllib.parse import urlparse
import eventlet
requests = eventlet.import_patched('requests.__init__')
time = eventlet.import_patched('time')
import redis
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import settings
num_requests = 0
redis = redis.StrictRedis(host=settings.redis_host, port=settings.redis_port, db=settings.redis_db)
def make_request(url, return_soup=True):
# global request building and response handling
url = format_url(url)
if "picassoRedirect" in url:
return None # skip the redirect URLs
global num_requests
if num_requests >= settings.max_requests:
raise Exception("Reached the max number of requests: {}".format(settings.max_requests))
proxies = get_proxy()
try:
r = requests.get(url, headers=settings.headers, proxies=proxies)
except RequestException as e:
log("WARNING: Request for {} failed, trying again.".format(url))
num_requests += 1
if r.status_code != 200:
os.system('say "Got non-200 Response"')
log("WARNING: Got a {} status code for URL: {}".format(r.status_code, url))
return None
if return_soup:
return BeautifulSoup(r.text), r.text
return r
def format_url(url):
# make sure URLs aren't relative, and strip unnecssary query args
u = urlparse(url)
scheme = u.scheme or "https"
host = u.netloc or "www.amazon.de"
path = u.path
if not u.query:
query = ""
else:
query = "?"
for piece in u.query.split("&"):
k, v = piece.split("=")
if k in settings.allowed_params:
query += "{k}={v}&".format(**locals())
query = query[:-1]
return "{scheme}://{host}{path}{query}".format(**locals())
def log(msg):
# global logging function
if settings.log_stdout:
try:
print("{}: {}".format(datetime.now(), msg))
except UnicodeEncodeError:
pass # squash logging errors in case of non-ascii text
def get_proxy():
# choose a proxy server to use for this request, if we need one
if not settings.proxies or len(settings.proxies) == 0:
return None
proxy = random.choice(settings.proxies)
proxy_url = "socks5://{user}:{passwd}#{ip}:{port}/".format(
user=settings.proxy_user,
passwd=settings.proxy_pass,
ip=proxy,
port=settings.proxy_port,
)
return {
"http": proxy_url,
"https": proxy_url
}
if __name__ == '__main__':
# test proxy server IP masking
r = make_request('https://api.ipify.org?format=json', return_soup=False)
print(r.text)
output
Traceback (most recent call last):
File "helpers.py", line 112, in <module>
r = make_request('https://api.ipify.org?format=json', return_soup=False)
File "helpers.py", line 36, in make_request
r = requests.get(url, headers=settings.headers, proxies=proxies)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
chunked=chunked,
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 376, in _make_request
self._validate_conn(conn)
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn
conn.connect()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connection.py", line 300, in connect
conn = self._new_conn()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/contrib/socks.py", line 99, in _new_conn
**extra_kw
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 199, in create_connection
sock.connect((remote_host, remote_port))
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 47, in wrapper
return function(*args, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 774, in connect
super(socksocket, self).settimeout(self._timeout)
File "/home/ec2-user/env/lib64/python3.7/site-packages/eventlet/greenio/base.py", line 395, in settimeout
self.setblocking(True)
What might be the problem here?
Turns out removing eventlet.monkey_patch() and import eventlet solved the problem.

Python requests.get() function issue - Failed to establish a new connection

I have write a simple python code which reads a list of domains from a txt file and checking each if is a WordPress site or not based on the returned result.
the code is as follow:
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line
source = requests.get(domain)
if "wp-include" in source:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
The errors are as follow:
Traceback (most recent call last):
File "./test4.py", line 8, in <module>
source = requests.get(domain)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='testing.com%0a', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd5a00c4d50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
I was able to run my code only if I set manually the value of source as follow and do not read the domains from the list and the results were correct:
source = requests.get(domain).text
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line.rstrip()
source = requests.get(domain)
if "wp-include" in source.text:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
source.text to get the requests response, rstrip() to remove \n
with domain transformation to a valid url (for requests) (python3):
#!/usr/bin/env python
import requests
import re
from urllib import parse
def get_domains(file):
res = []
with open(file) as f:
for x in f:
url = x.strip()
p = parse.urlparse(url, 'http')
netloc = p.netloc or p.path
path = p.path if p.netloc else ''
if not netloc.startswith('www.'):
netloc = 'www.' + netloc
p = parse.ParseResult('http', netloc, path, *p[3:])
res.append(p.geturl())
return res
def is_wordpress(url):
print(f"getting: {url}")
content = requests.get(url).text
if re.search('wp-include', content):
return True
else:
return False
def main():
result = {}
for domain in get_domains('domain.txt'):
result[domain] = is_wordpress(domain)
print(result)
if __name__ == '__main__':
main()

rest api throwing max tries exceeded error

I am trying to connect to a website via RESTful API. A token has to be generated to access the methods. And its working file since i can access data through all the methods but i am stuck at this one.
My code So far:
class FlipkartAPI:
def __init__(self, token, sandbox=False):
self.token = token
self.session = self.get_session()
self.sandbox = sandbox
def get_session(self):
session = requests.Session()
session.headers.update({
'Authorization': 'Bearer %s' % self.token,
'Content-type': 'application/json',
})
return session
def returns(self, source, modified_after=None, created_after = None):
if self.sandbox == False:
url = "http://api.flipkart.net/returns"
else:
url = "http://sandbox-api.flipkart.net/returns"
payload = {'source':source,
'modifiedAfter':modified_after,
'createdAfter': created_after}
return self.session.get(url, params = payload)
test.py:
class ListOrders:
def __init__(self):
self.app_id = 'app_id'
self.app_secret = 'app_secret'
auth = Authentication(self.app_id, self.app_secret, sandbox=False)
get_token = auth.get_access_token()
token_str = get_token.json()
token = token_str['access_token']
self.flipkart = FlipkartAPI(token, sandbox=False)
def ret(self):
r = self.flipkart.returns('customer_return', modified_after='2015-09-01', created_after='2015-09-01')
print r.url
print r.status_code
The problem is that i am getting max tries exceeded error every time i call ret method. And It doesn't even print url and the status_code for the request. Link to Documentation. What i am i doing wrong? I can access other method so there is no problem with the token generation.
Traceback:
Traceback (most recent call last):
File "test.py", line 131, in <module>
r = x.ret()
File "test.py", line 123, in ret
r = self.flipkart.returns('customer_return')
File "/home/manish/Desktop/Flipkart_Api_Main/api.py", line 77, in returns
return self.session.get(url)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 467, in get
return self.request('GET', url, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 455, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 558, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/adapters.py", line 378, in send
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='api.flipkart.net', port=80): Max retries exceeded with url: /returns (Caused by <class 'socket.error'>: [Errno 111] Connection refused)
EDIT: POSTMAN APP DATA
Images

Python Request - ConnectionError - Max retries exceeded

I am using rauth and requests to make calls to the Beatport API. The call works but I quite occasionaly get the following error ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443): Max retries exceeded with url
Here is the traceback.
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "scraper/songlist_top100.py", line 88, in <module>
'sortBy': 'releaseDate ASC'})
File "C:\Python27\lib\site-packages\requests\sessions.py", line 347, in get
return self.request('GET', url, **kwargs)
File "C:\Python27\lib\site-packages\rauth\session.py", line 208, in request
return super(OAuth1Session, self).request(method, url, **req_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in reques
t
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 327, in send
raise ConnectionError(e)
ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443):
Max retries exceeded with url: /catalog/3/tracks?perPage=150&
oauth_nonce=xxxxx&oauth_timestamp=xxxxx&facets=artistName%3A
Avicii&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&
oauth_consumer_key=xxxxx&oauth_token=xxxxxx&sortBy=releaseDate+ASC
&oauth_signature=xxxxx%3D&page=3 (Caused by <class 'httplib.BadStatusLine'>: '')
Here is my script
from rauth import OAuth1Service
import requests
from hunt.models import DJ, Song
def get_obj_or_none(model, **kwargs):
try:
return model.objects.get(**kwargs)
except model.DoesNotExist:
return None
beatport_login = 'xxx'
beatport_pass = 'xxx'
beatport = OAuth1Service(
name='beatport',
consumer_key='xxxxx',
consumer_secret='xxxxx',
request_token_url= 'https://oauth-api.beatport.com/identity/1/oauth/request-token',
access_token_url='https://oauth-api.beatport.com/identity/1/oauth/access-token',
authorize_url='https://oauth-api.beatport.com/identity/1/oauth/authorize',
base_url='https://oauth-api.beatport.com/json/catalog')
request_token, request_token_secret = beatport.get_request_token(method='POST', data={
'oauth_callback': 'http://www.edmhunters.com'})
authorize_url = beatport.get_authorize_url(request_token)
values = {
'oauth_token': request_token,
'username': beatport_login,
'password': beatport_pass,
'submit' : 'Login',
}
r = requests.post('https://oauth-api.beatport.com/identity/1/oauth/authorize-submit', data=values)
verifier = r.url.split("oauth_verifier=",1)[1]
tokens = beatport.get_raw_access_token(request_token, request_token_secret, method='POST', data={
'oauth_verifier': verifier})
token_string = tokens.content
access_token = token_string[token_string.find('=')+1:token_string.find('&')]
access_token_secret = token_string[token_string.find('t=')+2:token_string.rfind('&s')]
session = beatport.get_session((access_token, access_token_secret))
for dj in DJ.objects.all():
r = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name, 'perPage': 150})
count_response = r.json()
results = []
for i in range(1, count_response['metadata']['totalPages']+1):
r1 = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name,
'page': i,
'perPage': 150,
'sortBy': 'releaseDate ASC'})
json_response = r1.json()
results += json_response['results']
song_list = []
for song in results:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
if not ((dj.name in artists) and ((dj.name not in remixers) if len(remixers)>0 else False)):
song_list.append(song)
for song in song_list:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
artist_list = ', '.join(artists)
remixer_list = ', '.join(remixers)
song_name = song['name']
if not(song_name.lower().find("feat.") == -1 ):
normal_name=song_name[0:song_name.lower().find("feat.")].rstrip()
else:
normal_name=song_name
genre_list=[]
for genre in song['genres']:
genre_list.append(genre['name'])
genres = ', '.join(genre_list)
if not get_obj_or_none(Song, name__iexact=song_name, artist=dj):
s = Song(song_id=song['id'],
name=song_name,
title=song['title'],
normalized_name=normal_name,
artist=dj,
artists=artist_list,
remixers=remixer_list,
release_date=song['releaseDate'],
slug=song['slug'],
artwork=song['images']['large']['url'],
genres=genres)
s.save()
print "Added song:", s.song_id, s.artist
Why do I get the above mentioned error?
It looks as if the Beatport API is overloaded and closes the connection prematurely sometimes. Your first set of requests succeeded just fine, it was page 3 that threw the error because the response is empty.
You really should report this to Beatport, but you could perhaps work around this issue by instructing the requests module to retry requests:
from requests.adapters import HTTPAdapter
# ....
session = beatport.get_session((access_token, access_token_secret))
session.mount('https://oauth-api.beatport.com', HTTPAdapter(max_retries=5))
would retry your requests a few more times in case an error occurred.

Categories