So i have been trying to understand the usage of a class constant, but i don't see how this can be overwritten. If my library look like this:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
TARGET = 'https://SOMETHING:9000'
def __init__(self, username, password, disable_insecure_warning=False):
"""
Log in the user whose credentials are provided and
store the access token to be used with all requests
against Arcsight
"""
action = 'ignore' if disable_insecure_warning else 'once'
warnings.simplefilter(action, InsecureRequestWarning)
r = self._post(
'/core-service/rest/LoginService/login', data={
'login': username,
'password': password,
}, is_json=False)
r.raise_for_status()
loginrequest = untangle.parse(r.content)
self.token = loginrequest.ns3_loginResponse.ns3_return.cdata
def format_time(self, *args):
currentdt = datetime.datetime.now(pytz.utc)
if len(args) > 0:
currentdt += datetime.timedelta(*args)
(dt, micro) = currentdt.strftime('%Y-%m-%dT%H:%M:%S.%f').split('.')
tz_offset = currentdt.astimezone(tzlocal()).strftime('%z')
tz_offset = "Z" if tz_offset == "" else tz_offset[:3] + ":" + tz_offset[3:]
dt = "%s.%03d%s" % (dt, int(micro) / 1000, tz_offset)
return dt
def _post(self, route, data, is_json=True, ):
"""
Post Call towards Arcsight Logger
:param route: API endpoint to fetch
:param is_json: Checks if post needs to be JSON
:param data: Request Body
:return: HTTP Response
"""
if not data:
return
url = self.TARGET + route
if is_json:
return requests.post(url, json=data, verify=False)
else:
return requests.post(url, data, verify=False)
This works just fine, if i manually set TARGET in this script, but when i import to another script, like this:
import arcsightrest
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
arcsight.TARGET = 'https://10.10.10.10:9000'
with arcsight.search('query') as search:
search.wait()
data = search.events(custom=True)
print data
Then when i run the script, i see that TARGET is never actually overwritten, because the Traceback still states that it is using the old TARGET in the init function of this call (which calls _post):
Traceback (most recent call last):
File "test.py", line 3, in <module>
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
File "/var/www/Projects2/ArcsightSDK/arcsightrest.py", line 37, in __init__
}, is_json=False)
File "/var/www/Projects2/ArcsightSDK/arcsightrest.py", line 69, in _post
return requests.post(url, data, verify=False)
File "/usr/lib/python2.7/site-packages/requests/api.py", line 110, in post
return request('post', url, data=data, json=json, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/api.py", line 56, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/sessions.py", line 475, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/site-packages/requests/sessions.py", line 596, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/site-packages/requests/adapters.py", line 487, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='something', port=9000): Max retries exceeded with url: /core-service/rest/LoginService/login (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x1e59e50>: Failed to establish a new connection: [Errno -2] Name or service not known',))
You are overriding variable after creating it's instance
arcsight = arcsightrest.ArcsightLogger('admin', 'somepassword', False)
#__init__ has been already done
arcsight.TARGET = 'https://10.10.10.10:9000'
so in the __init__ function it has the old value. You need to change variable by using class not the instance
import arcsightrest
arcsightrest.ArcsightLogger.TARGET = 'https://10.10.10.10:9000'
Since you want to use a different target for different instances use an instance variable, not a class variable. After all, it's not really a constant if it's going to change.
You can pass the value for the URL target in the __init__() method. Use a default value if there is an appropriate one:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
def __init__(self, username, password, disable_insecure_warning=False, target='https://SOMETHING:9000'):
self.target = target
# etc...
Then use self.target in _post().
If you don't like setting the default in the __init__() method's argument then you can define a default value as a class variable and use it to initialise self.target:
class ArcsightLogger(object):
"""
Main Class to interact with Arcsight Logger REST API
"""
TARGET = 'https://SOMETHING:9000'
def __init__(self, username, password, disable_insecure_warning=False, target=None):
self.target = target if target is not None else self.TARGET
Related
I am trying to pull a huge amount of data (in millions) and I am getting the following error when running my code. If I run the same code with a small range (to be exact a range of 2) it runs successfully. Please assist in helping me know if this is my issue or is coming from the API side
Thanks
The Error I am getting
DEBUG:google.api_core.bidi:Started helper thread Thread-ConsumeBidirectionalStream
DEBUG:google.api_core.bidi:Thread-ConsumeBidirectionalStream caught error 400 Request contains an invalid argument. and will exit. Generally this is due to the RPC itself being cancelled and the error will be surfaced to the calling code.
Traceback (most recent call last):
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 147, in error_remapped_callable
return _StreamingResponseIterator(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 73, in __init__
self._stored_first_result = next(self._wrapped)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/grpc/_channel.py", line 426, in __next__
return self._next()
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/grpc/_channel.py", line 826, in _next
raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Request contains an invalid argument."
debug_error_string = "{"created":"#1652904360.179503883","description":"Error received from peer ipv4:173.194.76.95:443","file":"src/core/lib/surface/call.cc","file_line":952,"grpc_message":"Request contains an invalid argument.","grpc_status":3}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/bidi.py", line 636, in _thread_main
self._bidi_rpc.open()
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/bidi.py", line 279, in open
call = self._start_rpc(iter(request_generator), metadata=self._rpc_metadata)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/services/big_query_write/client.py", line 678, in append_rows
response = rpc(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/gapic_v1/method.py", line 154, in __call__
return wrapped_func(*args, **kwargs)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/retry.py", line 283, in retry_wrapped_func
return retry_target(
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/retry.py", line 190, in retry_target
return target()
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/api_core/grpc_helpers.py", line 151, in error_remapped_callable
raise exceptions.from_grpc_error(exc) from exc
google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument.
INFO:google.api_core.bidi:Thread-ConsumeBidirectionalStream exiting
DEBUG:google.cloud.bigquery_storage_v1.writer:Finished stopping manager.
Traceback (most recent call last):
File "write_data_to_db2.py", line 207, in <module>
p.append_rows_pending(project_id='dwingestion', dataset_id='ke',
File "write_data_to_db2.py", line 188, in append_rows_pending
response_future_1 = append_rows_stream.send(request)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/writer.py", line 234, in send
return self._open(request)
File "/home/coyugi/teltel_env/lib/python3.8/site-packages/google/cloud/bigquery_storage_v1/writer.py", line 207, in _open
raise request_exception
google.api_core.exceptions.Unknown: None There was a problem opening the stream. Try turning on DEBUG level logs to see the error.
Summary Of My Code
# PULLING DATA FROM THE API
def whole_teltel_raw_data():
# Creating a session to introduce network consistency
session = requests.Session()
retry = Retry(connect=3, backoff_factor=1.0)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
url = "https://my_api_url"
the_headers = {"X-API-KEY": 'my key'}
offset_limit = 1249500
teltel_data = []
# Loop through the results and if present extend the teltel_data list
#======================================================================================================================
# WRITE THE DATA TO THE DATA WAREHOUSE
# ======================================================================================================================
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'dwingestion-b033d9535e9d.json'
def create_row_data(tuple_data):
call_id, starttime, stoptime, direction, type, status, duration_sec, rate, cost, transfer, extra_prefix, audio_url, \
hangup_element, caller_number, caller_type, caller_cid, caller_dnid, caller_user_id, caller_user_short, \
callee_number, calle_type, callee, hangup_element_name, hangup_element_element, callee_user_id, callee_user_short, \
caller = tuple_data
row = teltel_call_data_pb2.TeltelCall()
row.call_id = call_id
row.starttime = starttime
row.stoptime = stoptime
row.direction = direction
row.type = type
row.status = status
row.duration_sec = duration_sec
row.rate = rate
row.cost = cost
row.transfer = transfer
row.extra_prefix = extra_prefix
row.audio_url = audio_url
row.hangup_element = hangup_element
row.caller_number = caller_number
row.caller_type = caller_type
row.caller_cid = caller_cid
row.caller_dnid = caller_dnid
row.caller_user_id = caller_user_id
row.caller_user_short = caller_user_short
row.callee_number = callee_number
row.calle_type = calle_type
row.callee = callee
row.hangup_element_name = hangup_element_name
row.hangup_element_title = hangup_element_element
row.callee_user_id = callee_user_id
row.callee_user_short = callee_user_short
row.caller = caller
return row.SerializeToString()
# Creating connection to the data warehouse
def create_bigquery_storage_client(google_credentials):
return bigquery_storage_v1.client.BigQueryWriteClient(
credentials=google_credentials
)
class GcpBigqueryStorageService(object):
def __init__(self, google_credentials=None, gcp_config=None):
self.client = create_bigquery_storage_client(google_credentials)
self.config = gcp_config
def append_rows_pending(self, project_id: str, dataset_id: str, table_id: str):
"""Create a write stream, write some sample data, and commit the stream."""
# write_client = self.client
parent = self.client.table_path(project_id, dataset_id, table_id)
write_stream = types.WriteStream()
# When creating the stream, choose the type. Use the PENDING type to wait
write_stream.type_ = types.WriteStream.Type.PENDING
write_stream = self.client.create_write_stream(
parent=parent, write_stream=write_stream
)
stream_name = write_stream.name
# Create a template with fields needed for the first request.
request_template = types.AppendRowsRequest()
# The initial request must contain the stream name.
request_template.write_stream = stream_name
# So that BigQuery knows how to parse the serialized_rows, generate a
# protocol buffer representation of your message descriptor.
proto_schema = types.ProtoSchema()
proto_descriptor = descriptor_pb2.DescriptorProto()
teltel_call_data_pb2.TeltelCall.DESCRIPTOR.CopyToProto(proto_descriptor)
proto_schema.proto_descriptor = proto_descriptor
proto_data = types.AppendRowsRequest.ProtoData()
proto_data.writer_schema = proto_schema
request_template.proto_rows = proto_data
# Some stream types support an unbounded number of requests. Construct an
# AppendRowsStream to send an arbitrary number of requests to a stream.
append_rows_stream = writer.AppendRowsStream(self.client, request_template)
# Create a batch of row data by appending proto2 serialized bytes to the
# serialized_rows repeated field.
proto_rows = types.ProtoRows()
row_number = 0
for row in whole_teltel_raw_data():
proto_rows.serialized_rows.append(create_row_data(row))
# checking the writing progress
row_number = row_number + 1
print("Writing to the database row number", row_number)
# The first request must always have an offset of 0.
request = types.AppendRowsRequest()
proto_data = types.AppendRowsRequest.ProtoData()
proto_data.rows = proto_rows
request.proto_rows = proto_data
append_rows_stream.close()
# A PENDING type stream must be "finalized" before being committed. No new
# records can be written to the stream after this method has been called.
self.client.finalize_write_stream(name=write_stream.name)
# Commit the stream you created earlier.
batch_commit_write_streams_request = types.BatchCommitWriteStreamsRequest()
batch_commit_write_streams_request.parent = parent
batch_commit_write_streams_request.write_streams = [write_stream.name]
self.client.batch_commit_write_streams(batch_commit_write_streams_request)
print(f"Writes to stream: '{write_stream.name}' have been committed.")
p = GcpBigqueryStorageService()
p.append_rows_pending(project_id='my_project', dataset_id='my_id', table_id='teltel_call_2')
I have a grpc connection established and i try to make a request in the channel but when i call the service from the client i get the following exception. Does anyone know something about it? Does the usage of threads is the reason for it? I can't figure out what is wrong with it.
This is the protobuf schema with the service:
service P4Runtime {
// Update one or more P4 entities on the target.
rpc Write(WriteRequest) returns (WriteResponse) {
}
// Read one or more P4 entities from the target.
rpc Read(ReadRequest) returns (stream ReadResponse) {
}
// Sets the P4 forwarding-pipeline config.
rpc SetForwardingPipelineConfig(SetForwardingPipelineConfigRequest)
returns (SetForwardingPipelineConfigResponse) {
}
// Gets the current P4 forwarding-pipeline config.
rpc GetForwardingPipelineConfig(GetForwardingPipelineConfigRequest)
returns (GetForwardingPipelineConfigResponse) {
}
// Represents the bidirectional stream between the controller and the
// switch (initiated by the controller), and is managed for the following
// purposes:
// - connection initiation through client arbitration
// - indicating switch session liveness: the session is live when switch
// sends a positive client arbitration update to the controller, and is
// considered dead when either the stream breaks or the switch sends a
// negative update for client arbitration
// - the controller sending/receiving packets to/from the switch
// - streaming of notifications from the switch
rpc StreamChannel(stream StreamMessageRequest)
returns (stream StreamMessageResponse) {
}
rpc Capabilities(CapabilitiesRequest) returns (CapabilitiesResponse) {
}
}
Below is the function that i call and exception happens:
def write_IPv4_Rules(p4info_helper,ingress_sw,ipv4_dst,lpm,dst_mac,out_port):
table_entry = p4info_helper.buildTableEntry(
table_name="MyIngress.ipv4_lpm",
match_fields={
"hdr.ipv4.dstAddr": (ipv4_dst, lpm)
},
action_name="MyIngress.ipv4_forward",
action_params={
"dstAddr": dst_mac,
"port": out_port
})
ingress_sw.WriteTableEntry(table_entry)
print("Installed ipv4 rule on %s" % ingress_sw.name)
This is the invocation of the above function which is inside a thead:
write_IPv4_Rules(p4info_helper,ingress_sw,ip_dest,32,dst_mac,2)
Below i have the code of a controller that uses grpc service:
class SwitchConnection(object):
def __init__(self, name=None, address='127.0.0.1:50051', device_id=0,
proto_dump_file=None):
self.name = name
self.address = address
self.device_id = device_id
self.p4info = None
self.channel = grpc.insecure_channel(self.address)
if proto_dump_file is not None:
interceptor = GrpcRequestLogger(proto_dump_file)
self.channel = grpc.intercept_channel(self.channel, interceptor)
self.client_stub = p4runtime_pb2_grpc.P4RuntimeStub(self.channel)
self.requests_stream = IterableQueue()
self.stream_msg_resp = self.client_stub.StreamChannel(iter(self.requests_stream))
self.proto_dump_file = proto_dump_file
connections.append(self)
def WriteTableEntry(self, table_entry, dry_run=False):
request = p4runtime_pb2.WriteRequest()
request.device_id = self.device_id
request.election_id.low = 1
update = request.updates.add()
if table_entry.is_default_action:
update.type = p4runtime_pb2.Update.MODIFY
else:
update.type = p4runtime_pb2.Update.INSERT
update.entity.table_entry.CopyFrom(table_entry)
if dry_run:
print("P4Runtime Write:", request)
else:
self.client_stub.Write(request)
class GrpcRequestLogger(grpc.UnaryUnaryClientInterceptor,
grpc.UnaryStreamClientInterceptor):
"""Implementation of a gRPC interceptor that logs request to a file"""
def __init__(self, log_file):
self.log_file = log_file
with open(self.log_file, 'w') as f:
# Clear content if it exists.
f.write("")
def log_message(self, method_name, body):
with open(self.log_file, 'a') as f:
ts = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
msg = str(body)
f.write("\n[%s] %s\n---\n" % (ts, method_name))
if len(msg) < MSG_LOG_MAX_LEN:
f.write(str(body))
else:
f.write("Message too long (%d bytes)! Skipping log...\n" % len(msg))
f.write('---\n')
def intercept_unary_unary(self, continuation, client_call_details, request):
self.log_message(client_call_details.method, request)
return continuation(client_call_details, request)
def intercept_unary_stream(self, continuation, client_call_details, request):
self.log_message(client_call_details.method, request)
return continuation(client_call_details, request)
class IterableQueue(Queue):
_sentinel = object()
def __iter__(self):
return iter(self.get, self._sentinel)
def close(self):
self.put(self._sentinel)
The exception that i receive when i run the program:
Exception in thread Thread-11:
Traceback (most recent call last):
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "./mycontroller.py", line 348, in packet_router_processing
ipv4_forwarding(p4info_helper,extract_header,ingress_sw,packetIn.packet.metadata)
File "./mycontroller.py", line 256, in ipv4_forwarding
write_IPv4_Rules(p4info_helper,ingress_sw,ip_dest,24,dst_mac,1)
File "./mycontroller.py", line 38, in write_IPv4_Rules
ingress_sw.WriteTableEntry(table_entry)
File "/home/p4/tutorials/exercises/test/../../utils/p4runtime_lib/switch.py", line 102, in WriteTableEntry
self.client_stub.Write(request)
File "/usr/local/lib/python3.8/dist-packages/grpc/_interceptor.py", line 207, in __call__
response, ignored_call = self._with_call(
File "/usr/local/lib/python3.8/dist-packages/grpc/_interceptor.py", line 240, in _with_call
call = self._interceptor.intercept_unary_unary(
File "/home/p4/tutorials/exercises/test/../../utils/p4runtime_lib/switch.py", line 220, in intercept_unary_unary
return continuation(client_call_details, request)
File "/usr/local/lib/python3.8/dist-packages/grpc/_interceptor.py", line 228, in continuation
response, call = self._thunk(new_method).with_call(
File "/usr/local/lib/python3.8/dist-packages/grpc/_channel.py", line 557, in with_call
return _end_unary_response_blocking(state, call, True, None)
File "/usr/local/lib/python3.8/dist-packages/grpc/_channel.py", line 466, in _end_unary_response_blocking
raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
status = StatusCode.UNKNOWN
details = ""
debug_error_string = "{"created":"#1646087190.862135612","description":"Error received from peer","file":"src/core/lib/surface/call.cc","file_line":1036,"grpc_message":"","grpc_status":2}"
I am trying to use a POST URI in my code to access NCBO's Annotator tool. My current code is a GET request, but I don't know how to format this into a POST request. My data is the text variable.
All the examples which I've seen uses request.get(url) -> request.post(url, data=data), but how can I do this for build_opener() and json.loads()?
Here is my code:
def get_json(url):
#get annotations
opener = urllib.request.build_opener()
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
return json.loads(opener.open(url).read())
text = "random text with a lot of words"
annotations = get_json("http://data.bioontology.org/annotator?text=" + urllib.parse.quote(text))
Updated code:
def get_annotations(text, url):
headers = [('Authorization', 'apikey token=' + API_KEY)]
data = text
response = requests.request("POST",url,headers=headers,data=data)
return response.text.encode('utf-8')
annotations = get_annotations(text, "http://data.bioontology.org/annotator?text=" + urllib.parse.quote(text))
Error:
response = requests.request("POST",url,headers=headers,data=data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 516, in request
prep = self.prepare_request(req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/sessions.py", line 449, in prepare_request
p.prepare(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/models.py", line 315, in prepare
self.prepare_headers(headers)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/requests/models.py", line 447, in prepare_headers
for header in headers.items():
AttributeError: 'list' object has no attribute 'items'
I will recommend you to import requests module, this module is easy to choose request method.
import requests
def get_json(url):
headers = [('Authorization', 'apikey token=' + API_KEY)] #add headers here
data = #params you want to send
response = requests.request("POST",url,headers=headers,data=data) #you can choose your methods here.
return response.text.encode('utf-8')
FYR simply like this.
I am trying to connect to a website via RESTful API. A token has to be generated to access the methods. And its working file since i can access data through all the methods but i am stuck at this one.
My code So far:
class FlipkartAPI:
def __init__(self, token, sandbox=False):
self.token = token
self.session = self.get_session()
self.sandbox = sandbox
def get_session(self):
session = requests.Session()
session.headers.update({
'Authorization': 'Bearer %s' % self.token,
'Content-type': 'application/json',
})
return session
def returns(self, source, modified_after=None, created_after = None):
if self.sandbox == False:
url = "http://api.flipkart.net/returns"
else:
url = "http://sandbox-api.flipkart.net/returns"
payload = {'source':source,
'modifiedAfter':modified_after,
'createdAfter': created_after}
return self.session.get(url, params = payload)
test.py:
class ListOrders:
def __init__(self):
self.app_id = 'app_id'
self.app_secret = 'app_secret'
auth = Authentication(self.app_id, self.app_secret, sandbox=False)
get_token = auth.get_access_token()
token_str = get_token.json()
token = token_str['access_token']
self.flipkart = FlipkartAPI(token, sandbox=False)
def ret(self):
r = self.flipkart.returns('customer_return', modified_after='2015-09-01', created_after='2015-09-01')
print r.url
print r.status_code
The problem is that i am getting max tries exceeded error every time i call ret method. And It doesn't even print url and the status_code for the request. Link to Documentation. What i am i doing wrong? I can access other method so there is no problem with the token generation.
Traceback:
Traceback (most recent call last):
File "test.py", line 131, in <module>
r = x.ret()
File "test.py", line 123, in ret
r = self.flipkart.returns('customer_return')
File "/home/manish/Desktop/Flipkart_Api_Main/api.py", line 77, in returns
return self.session.get(url)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 467, in get
return self.request('GET', url, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 455, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 558, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/adapters.py", line 378, in send
raise ConnectionError(e)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='api.flipkart.net', port=80): Max retries exceeded with url: /returns (Caused by <class 'socket.error'>: [Errno 111] Connection refused)
EDIT: POSTMAN APP DATA
Images
I am using rauth and requests to make calls to the Beatport API. The call works but I quite occasionaly get the following error ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443): Max retries exceeded with url
Here is the traceback.
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "scraper/songlist_top100.py", line 88, in <module>
'sortBy': 'releaseDate ASC'})
File "C:\Python27\lib\site-packages\requests\sessions.py", line 347, in get
return self.request('GET', url, **kwargs)
File "C:\Python27\lib\site-packages\rauth\session.py", line 208, in request
return super(OAuth1Session, self).request(method, url, **req_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in reques
t
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 327, in send
raise ConnectionError(e)
ConnectionError: HTTPSConnectionPool(host='oauth-api.beatport.com', port=443):
Max retries exceeded with url: /catalog/3/tracks?perPage=150&
oauth_nonce=xxxxx&oauth_timestamp=xxxxx&facets=artistName%3A
Avicii&oauth_signature_method=HMAC-SHA1&oauth_version=1.0&
oauth_consumer_key=xxxxx&oauth_token=xxxxxx&sortBy=releaseDate+ASC
&oauth_signature=xxxxx%3D&page=3 (Caused by <class 'httplib.BadStatusLine'>: '')
Here is my script
from rauth import OAuth1Service
import requests
from hunt.models import DJ, Song
def get_obj_or_none(model, **kwargs):
try:
return model.objects.get(**kwargs)
except model.DoesNotExist:
return None
beatport_login = 'xxx'
beatport_pass = 'xxx'
beatport = OAuth1Service(
name='beatport',
consumer_key='xxxxx',
consumer_secret='xxxxx',
request_token_url= 'https://oauth-api.beatport.com/identity/1/oauth/request-token',
access_token_url='https://oauth-api.beatport.com/identity/1/oauth/access-token',
authorize_url='https://oauth-api.beatport.com/identity/1/oauth/authorize',
base_url='https://oauth-api.beatport.com/json/catalog')
request_token, request_token_secret = beatport.get_request_token(method='POST', data={
'oauth_callback': 'http://www.edmhunters.com'})
authorize_url = beatport.get_authorize_url(request_token)
values = {
'oauth_token': request_token,
'username': beatport_login,
'password': beatport_pass,
'submit' : 'Login',
}
r = requests.post('https://oauth-api.beatport.com/identity/1/oauth/authorize-submit', data=values)
verifier = r.url.split("oauth_verifier=",1)[1]
tokens = beatport.get_raw_access_token(request_token, request_token_secret, method='POST', data={
'oauth_verifier': verifier})
token_string = tokens.content
access_token = token_string[token_string.find('=')+1:token_string.find('&')]
access_token_secret = token_string[token_string.find('t=')+2:token_string.rfind('&s')]
session = beatport.get_session((access_token, access_token_secret))
for dj in DJ.objects.all():
r = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name, 'perPage': 150})
count_response = r.json()
results = []
for i in range(1, count_response['metadata']['totalPages']+1):
r1 = session.get('https://oauth-api.beatport.com/catalog/3/tracks', params={'facets': "artistName:"+dj.name,
'page': i,
'perPage': 150,
'sortBy': 'releaseDate ASC'})
json_response = r1.json()
results += json_response['results']
song_list = []
for song in results:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
if not ((dj.name in artists) and ((dj.name not in remixers) if len(remixers)>0 else False)):
song_list.append(song)
for song in song_list:
artists = [artist['name'] for artist in song['artists'] if str(artist['type'])=='artist']
remixers = [artist['name'] for artist in song['artists'] if str(artist['type'])=='remixer']
artist_list = ', '.join(artists)
remixer_list = ', '.join(remixers)
song_name = song['name']
if not(song_name.lower().find("feat.") == -1 ):
normal_name=song_name[0:song_name.lower().find("feat.")].rstrip()
else:
normal_name=song_name
genre_list=[]
for genre in song['genres']:
genre_list.append(genre['name'])
genres = ', '.join(genre_list)
if not get_obj_or_none(Song, name__iexact=song_name, artist=dj):
s = Song(song_id=song['id'],
name=song_name,
title=song['title'],
normalized_name=normal_name,
artist=dj,
artists=artist_list,
remixers=remixer_list,
release_date=song['releaseDate'],
slug=song['slug'],
artwork=song['images']['large']['url'],
genres=genres)
s.save()
print "Added song:", s.song_id, s.artist
Why do I get the above mentioned error?
It looks as if the Beatport API is overloaded and closes the connection prematurely sometimes. Your first set of requests succeeded just fine, it was page 3 that threw the error because the response is empty.
You really should report this to Beatport, but you could perhaps work around this issue by instructing the requests module to retry requests:
from requests.adapters import HTTPAdapter
# ....
session = beatport.get_session((access_token, access_token_secret))
session.mount('https://oauth-api.beatport.com', HTTPAdapter(max_retries=5))
would retry your requests a few more times in case an error occurred.