I am trying to connect to a website via RESTful API. A token has to be generated to access the methods. And its working file since i can access data through all the methods but i am stuck at this one.
My code So far:
class FlipkartAPI:
def __init__(self, token, sandbox=False):
self.token = token
self.session = self.get_session()
self.sandbox = sandbox
def get_session(self):
session = requests.Session()
session.headers.update({
'Authorization': 'Bearer %s' % self.token,
'Content-type': 'application/json',
})
return session
def returns(self, source, modified_after=None, created_after = None):
if self.sandbox == False:
url = "http://api.flipkart.net/returns"
else:
url = "http://sandbox-api.flipkart.net/returns"
payload = {'source':source,
'modifiedAfter':modified_after,
'createdAfter': created_after}
return self.session.get(url, params = payload)
test.py:
class ListOrders:
def __init__(self):
self.app_id = 'app_id'
self.app_secret = 'app_secret'
auth = Authentication(self.app_id, self.app_secret, sandbox=False)
get_token = auth.get_access_token()
token_str = get_token.json()
token = token_str['access_token']
self.flipkart = FlipkartAPI(token, sandbox=False)
def ret(self):
r = self.flipkart.returns('customer_return', modified_after='2015-09-01', created_after='2015-09-01')
print r.url
print r.status_code
The problem is that i am getting max tries exceeded error every time i call ret method. And It doesn't even print url and the status_code for the request. Link to Documentation. What i am i doing wrong? I can access other method so there is no problem with the token generation.
Traceback:
Traceback (most recent call last):
File "test.py", line 131, in <module>
r = x.ret()
File "test.py", line 123, in ret
r = self.flipkart.returns('customer_return')
File "/home/manish/Desktop/Flipkart_Api_Main/api.py", line 77, in returns
return self.session.get(url)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 467, in get
return self.request('GET', url, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 455, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 558, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/adapters.py", line 378, in send
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='api.flipkart.net', port=80): Max retries exceeded with url: /returns (Caused by <class 'socket.error'>: [Errno 111] Connection refused)
EDIT: POSTMAN APP DATA
Images
Related
I am trying to implement the Amazon Web Scraper mentioned here. However, I get the output mentioned below. The output repeats until it stops with RecursionError: maximum recursion depth exceeded.
I have already tried downgrading eventlet to version 0.17.4 as mentioned here.
Also, the requestsmodule is getting patched as you can see in helpers.py.
helpers.py
import os
import random
from datetime import datetime
from urllib.parse import urlparse
import eventlet
requests = eventlet.import_patched('requests.__init__')
time = eventlet.import_patched('time')
import redis
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import settings
num_requests = 0
redis = redis.StrictRedis(host=settings.redis_host, port=settings.redis_port, db=settings.redis_db)
def make_request(url, return_soup=True):
# global request building and response handling
url = format_url(url)
if "picassoRedirect" in url:
return None # skip the redirect URLs
global num_requests
if num_requests >= settings.max_requests:
raise Exception("Reached the max number of requests: {}".format(settings.max_requests))
proxies = get_proxy()
try:
r = requests.get(url, headers=settings.headers, proxies=proxies)
except RequestException as e:
log("WARNING: Request for {} failed, trying again.".format(url))
num_requests += 1
if r.status_code != 200:
os.system('say "Got non-200 Response"')
log("WARNING: Got a {} status code for URL: {}".format(r.status_code, url))
return None
if return_soup:
return BeautifulSoup(r.text), r.text
return r
def format_url(url):
# make sure URLs aren't relative, and strip unnecssary query args
u = urlparse(url)
scheme = u.scheme or "https"
host = u.netloc or "www.amazon.de"
path = u.path
if not u.query:
query = ""
else:
query = "?"
for piece in u.query.split("&"):
k, v = piece.split("=")
if k in settings.allowed_params:
query += "{k}={v}&".format(**locals())
query = query[:-1]
return "{scheme}://{host}{path}{query}".format(**locals())
def log(msg):
# global logging function
if settings.log_stdout:
try:
print("{}: {}".format(datetime.now(), msg))
except UnicodeEncodeError:
pass # squash logging errors in case of non-ascii text
def get_proxy():
# choose a proxy server to use for this request, if we need one
if not settings.proxies or len(settings.proxies) == 0:
return None
proxy = random.choice(settings.proxies)
proxy_url = "socks5://{user}:{passwd}#{ip}:{port}/".format(
user=settings.proxy_user,
passwd=settings.proxy_pass,
ip=proxy,
port=settings.proxy_port,
)
return {
"http": proxy_url,
"https": proxy_url
}
if __name__ == '__main__':
# test proxy server IP masking
r = make_request('https://api.ipify.org?format=json', return_soup=False)
print(r.text)
output
Traceback (most recent call last):
File "helpers.py", line 112, in <module>
r = make_request('https://api.ipify.org?format=json', return_soup=False)
File "helpers.py", line 36, in make_request
r = requests.get(url, headers=settings.headers, proxies=proxies)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
chunked=chunked,
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 376, in _make_request
self._validate_conn(conn)
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn
conn.connect()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connection.py", line 300, in connect
conn = self._new_conn()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/contrib/socks.py", line 99, in _new_conn
**extra_kw
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 199, in create_connection
sock.connect((remote_host, remote_port))
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 47, in wrapper
return function(*args, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 774, in connect
super(socksocket, self).settimeout(self._timeout)
File "/home/ec2-user/env/lib64/python3.7/site-packages/eventlet/greenio/base.py", line 395, in settimeout
self.setblocking(True)
What might be the problem here?
Turns out removing eventlet.monkey_patch() and import eventlet solved the problem.
I have write a simple python code which reads a list of domains from a txt file and checking each if is a WordPress site or not based on the returned result.
the code is as follow:
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line
source = requests.get(domain)
if "wp-include" in source:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
The errors are as follow:
Traceback (most recent call last):
File "./test4.py", line 8, in <module>
source = requests.get(domain)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='testing.com%0a', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7fd5a00c4d50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
I was able to run my code only if I set manually the value of source as follow and do not read the domains from the list and the results were correct:
source = requests.get(domain).text
import requests
#Loop domains list
with open('domains2') as f:
for line in f:
domain = line.rstrip()
source = requests.get(domain)
if "wp-include" in source.text:
results = 'Yes'
else:
results = 'No'
print(line , ' : ' , results)
source.text to get the requests response, rstrip() to remove \n
with domain transformation to a valid url (for requests) (python3):
#!/usr/bin/env python
import requests
import re
from urllib import parse
def get_domains(file):
res = []
with open(file) as f:
for x in f:
url = x.strip()
p = parse.urlparse(url, 'http')
netloc = p.netloc or p.path
path = p.path if p.netloc else ''
if not netloc.startswith('www.'):
netloc = 'www.' + netloc
p = parse.ParseResult('http', netloc, path, *p[3:])
res.append(p.geturl())
return res
def is_wordpress(url):
print(f"getting: {url}")
content = requests.get(url).text
if re.search('wp-include', content):
return True
else:
return False
def main():
result = {}
for domain in get_domains('domain.txt'):
result[domain] = is_wordpress(domain)
print(result)
if __name__ == '__main__':
main()
So i have been trying to understand the usage of a class constant, but i don't see how this can be overwritten. If my library look like this:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
TARGET = 'https://SOMETHING:9000'
def __init__(self, username, password, disable_insecure_warning=False):
"""
Log in the user whose credentials are provided and
store the access token to be used with all requests
against Arcsight
"""
action = 'ignore' if disable_insecure_warning else 'once'
warnings.simplefilter(action, InsecureRequestWarning)
r = self._post(
'/core-service/rest/LoginService/login', data={
'login': username,
'password': password,
}, is_json=False)
r.raise_for_status()
loginrequest = untangle.parse(r.content)
self.token = loginrequest.ns3_loginResponse.ns3_return.cdata
def format_time(self, *args):
currentdt = datetime.datetime.now(pytz.utc)
if len(args) > 0:
currentdt += datetime.timedelta(*args)
(dt, micro) = currentdt.strftime('%Y-%m-%dT%H:%M:%S.%f').split('.')
tz_offset = currentdt.astimezone(tzlocal()).strftime('%z')
tz_offset = "Z" if tz_offset == "" else tz_offset[:3] + ":" + tz_offset[3:]
dt = "%s.%03d%s" % (dt, int(micro) / 1000, tz_offset)
return dt
def _post(self, route, data, is_json=True, ):
"""
Post Call towards Arcsight Logger
:param route: API endpoint to fetch
:param is_json: Checks if post needs to be JSON
:param data: Request Body
:return: HTTP Response
"""
if not data:
return
url = self.TARGET + route
if is_json:
return requests.post(url, json=data, verify=False)
else:
return requests.post(url, data, verify=False)
This works just fine, if i manually set TARGET in this script, but when i import to another script, like this:
import arcsightrest
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
arcsight.TARGET = 'https://10.10.10.10:9000'
with arcsight.search('query') as search:
search.wait()
data = search.events(custom=True)
print data
Then when i run the script, i see that TARGET is never actually overwritten, because the Traceback still states that it is using the old TARGET in the init function of this call (which calls _post):
Traceback (most recent call last):
File "test.py", line 3, in <module>
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
File "/var/www/Projects2/ArcsightSDK/arcsightrest.py", line 37, in __init__
}, is_json=False)
File "/var/www/Projects2/ArcsightSDK/arcsightrest.py", line 69, in _post
return requests.post(url, data, verify=False)
File "/usr/lib/python2.7/site-packages/requests/api.py", line 110, in post
return request('post', url, data=data, json=json, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/api.py", line 56, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/sessions.py", line 475, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/site-packages/requests/sessions.py", line 596, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/adapters.py", line 487, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='something', port=9000): Max retries exceeded with url: /core-service/rest/LoginService/login (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x1e59e50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
You are overriding variable after creating it's instance
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
#__init__ has been already done
arcsight.TARGET = 'https://10.10.10.10:9000'
so in the __init__ function it has the old value. You need to change variable by using class not the instance
import arcsightrest
arcsightrest.ArcsightLogger.TARGET = 'https://10.10.10.10:9000'
Since you want to use a different target for different instances use an instance variable, not a class variable. After all, it's not really a constant if it's going to change.
You can pass the value for the URL target in the __init__() method. Use a default value if there is an appropriate one:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
def __init__(self, username, password, disable_insecure_warning=False, target='https://SOMETHING:9000'):
self.target = target
# etc...
Then use self.target in _post().
If you don't like setting the default in the __init__() method's argument then you can define a default value as a class variable and use it to initialise self.target:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
TARGET = 'https://SOMETHING:9000'
def __init__(self, username, password, disable_insecure_warning=False, target=None):
self.target = target if target is not None else self.TARGET
I am using this code to check proxy servers:
def check_proxy(p):
try:
r = requests.get('https://httpbin.org/get', proxies={'https': 'https://%s' % p}, timeout=5)
if r.status_code == 200:
return True
else:
return False
except:
return False
it works fine with working proxy. it fails with bad proxy after several minutes:
Traceback (most recent call last):
File "/home/me/Desktop/general/test_proxyt.py", line 22, in check_proxy
r = requests.get('https://httpbin.org/get', proxies={'https': 'https://%s' % p}, timeout=5)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 59, in get
return request('get', url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/api.py", line 48, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 451, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/sessions.py", line 557, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/requests/adapters.py", line 413, in send
raise ConnectionError(e, request=request)
ConnectionError: HTTPSConnectionPool(host='httpbin.org', port=443): Max retries exceeded with url: /get (Caused by ProxyError('Cannot connect to proxy.', error(110, 'Connection timed out')))
Is it possible to set proxy connection timeout exception?
May be you can try exceptions that come with the module.
def check_proxy(p):
try:
r = requests.get('https://httpbin.org/get', proxies={'https': 'https://%s' % p}, timeout=5)
if r.status_code == 200:
return True
else:
return False
except requests.exceptions.Timeout as e:
# Maybe set up for a retry
print e
except requests.exceptions.RequestException as e:
print e
To close the circle here, this was a bug in requests that has since been fixed.
I am using rauth and requests to make calls to the Beatport API. The call works but I quite occasionaly get the following error ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443): Max retries exceeded with url
Here is the traceback.
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "scraper/songlist_top100.py", line 88, in <module>
'sortBy': 'releaseDate ASC'})
File "C:\Python27\lib\site-packages\requests\sessions.py", line 347, in get
return self.request('GET', url, **kwargs)
File "C:\Python27\lib\site-packages\rauth\session.py", line 208, in request
return super(OAuth1Session, self).request(method, url, **req_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in reques
t
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 327, in send
raise ConnectionError(e)
ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443):
Max retries exceeded with url: /catalog/3/tracks?perPage=150&
oauth_nonce=xxxxx&oauth_timestamp=xxxxx&facets=artistName%3A
Avicii&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&
oauth_consumer_key=xxxxx&oauth_token=xxxxxx&sortBy=releaseDate+ASC
&oauth_signature=xxxxx%3D&page=3 (Caused by <class 'httplib.BadStatusLine'>: '')
Here is my script
from rauth import OAuth1Service
import requests
from hunt.models import DJ, Song
def get_obj_or_none(model, **kwargs):
try:
return model.objects.get(**kwargs)
except model.DoesNotExist:
return None
beatport_login = 'xxx'
beatport_pass = 'xxx'
beatport = OAuth1Service(
name='beatport',
consumer_key='xxxxx',
consumer_secret='xxxxx',
request_token_url= 'https://oauth-api.beatport.com/identity/1/oauth/request-token',
access_token_url='https://oauth-api.beatport.com/identity/1/oauth/access-token',
authorize_url='https://oauth-api.beatport.com/identity/1/oauth/authorize',
base_url='https://oauth-api.beatport.com/json/catalog')
request_token, request_token_secret = beatport.get_request_token(method='POST', data={
'oauth_callback': 'http://www.edmhunters.com'})
authorize_url = beatport.get_authorize_url(request_token)
values = {
'oauth_token': request_token,
'username': beatport_login,
'password': beatport_pass,
'submit' : 'Login',
}
r = requests.post('https://oauth-api.beatport.com/identity/1/oauth/authorize-submit', data=values)
verifier = r.url.split("oauth_verifier=",1)[1]
tokens = beatport.get_raw_access_token(request_token, request_token_secret, method='POST', data={
'oauth_verifier': verifier})
token_string = tokens.content
access_token = token_string[token_string.find('=')+1:token_string.find('&')]
access_token_secret = token_string[token_string.find('t=')+2:token_string.rfind('&s')]
session = beatport.get_session((access_token, access_token_secret))
for dj in DJ.objects.all():
r = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name, 'perPage': 150})
count_response = r.json()
results = []
for i in range(1, count_response['metadata']['totalPages']+1):
r1 = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name,
'page': i,
'perPage': 150,
'sortBy': 'releaseDate ASC'})
json_response = r1.json()
results += json_response['results']
song_list = []
for song in results:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
if not ((dj.name in artists) and ((dj.name not in remixers) if len(remixers)>0 else False)):
song_list.append(song)
for song in song_list:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
artist_list = ', '.join(artists)
remixer_list = ', '.join(remixers)
song_name = song['name']
if not(song_name.lower().find("feat.") == -1 ):
normal_name=song_name[0:song_name.lower().find("feat.")].rstrip()
else:
normal_name=song_name
genre_list=[]
for genre in song['genres']:
genre_list.append(genre['name'])
genres = ', '.join(genre_list)
if not get_obj_or_none(Song, name__iexact=song_name, artist=dj):
s = Song(song_id=song['id'],
name=song_name,
title=song['title'],
normalized_name=normal_name,
artist=dj,
artists=artist_list,
remixers=remixer_list,
release_date=song['releaseDate'],
slug=song['slug'],
artwork=song['images']['large']['url'],
genres=genres)
s.save()
print "Added song:", s.song_id, s.artist
Why do I get the above mentioned error?
It looks as if the Beatport API is overloaded and closes the connection prematurely sometimes. Your first set of requests succeeded just fine, it was page 3 that threw the error because the response is empty.
You really should report this to Beatport, but you could perhaps work around this issue by instructing the requests module to retry requests:
from requests.adapters import HTTPAdapter
# ....
session = beatport.get_session((access_token, access_token_secret))
session.mount('https://oauth-api.beatport.com', HTTPAdapter(max_retries=5))
would retry your requests a few more times in case an error occurred.