I have modified this code python-paged-ldap-snippet.py from https://gist.github.com/mattfahrner/c228ead9c516fc322d3a
My problem is that when I change my SEARCHFILTER from '(&(objectCategory=person)(objectClass=user))' to '(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
it runs just fine.
If it is on SEARCHFILTER='(&(objectCategory=person)(objectClass=user))', I notice that the code is not entering the writeToFile function.
The objective of the code is to dump all the user information and parse the info into a file.
I tried running LDAPSEARCH against '(&(objectCategory=person)(objectClass=user))' and I manage to get the output .
Not sure what is wrong. Suggestions are greatly appreciated.
Thank you.
#!/usr/bin/python
import sys
import ldap
import os
LDAPSERVER='ldap://xxx.xxx.xxx.xxx:389'
BASEDN='dc=ad,dc=company,dc=com'
LDAPUSER = "CN=LDAPuser,OU=XXX,OU=Users,DC=ad,DC=company,DC=com"
LDAPPASSWORD = 'LDAPpassword'
PAGESIZE = 20000
ATTRLIST = ['sAMAccountName','uid']
SEARCHFILTER='(&(objectCategory=person)(objectClass=user))'
#SEARCHFILTER='(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
data = []
ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW)
ldap.set_option(ldap.OPT_REFERRALS, 0)
l = ldap.initialize(LDAPSERVER)
l.protocol_version = 3 # Paged results only apply to LDAP v3
try:
l.simple_bind_s(LDAPUSER, LDAPPASSWORD)
print ' Login Done, Searching data'
except ldap.LDAPError as e:
exit('LDAP bind failed: %s' % e)
lc = ldap.controls.SimplePagedResultsControl(True,size=PAGESIZE,cookie='')
def writeToFile(data):
print ' Writing data to file'
#code to print all output into CVS file
while True:
try:
msgid = l.search_ext(BASEDN, ldap.SCOPE_SUBTREE, SEARCHFILTER, ATTRLIST, serverctrls=[lc])
except ldap.LDAPError as e:
sys.exit('LDAP search failed: %s' % e)
try:
rtype, rdata, rmsgid, serverctrls = l.result3(msgid)
except ldap.LDAPError as e:
sys.exit('Could not pull LDAP results: %s' % e)
for dn, attrs in rdata:
data.append(attrs)
pctrls = [
c for c in serverctrls if c.controlType == ldap.controls.SimplePagedResultsControl.controlType ]
if not pctrls:
print >> sys.stderr, 'Warning: Server ignores RFC 2696 control.'
break
cookie = pctrls[0].cookie
if not cookie:
writeToFile(data)
print 'Task Complete'
break
lc.controlValue = (PAGESIZE, cookie)
PAGESIZE = 20000
Lower your page size to a value <= 1000, since that's the max AD will give you at a time anyway. It's possible that it's waiting for 20000 records before requesting the next page and never getting it.
Related
I have a column in my dataframe that is of type suds.sax.text.Text and I want to convert it to a string. I can't find much on how to do this, except for this site. Using pandas.DataFrame.astype does not work. I'm sure there is an easy way to do this. The documentation is just going over my head. I am using a web service to use some of its functions to return some metadata on weather stations. This metadata gets returned back as a suds object. Link to the web service is here.
from suds.client import Client
from suds.transport.https import HttpAuthenticated
from urllib.error import URLError
from urllib.request import HTTPSHandler
import ssl
import pandas as pd
ssl._create_default_https_context = ssl._create_unverified_context
_URL_AWDB_WSDL = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
def _execute_awdb_call(a_func, ntries_max=3, sleep_sec=5, **kwargs):
ntries = 0
while 1:
try:
a_result = a_func(**kwargs)
break
except Exception as e:
ntries += 1
if ntries == ntries_max:
raise
else:
print(("WARNING: Received error executing AWDB function %s:"
" %s. Sleeping %d seconds and trying again." %
(str(a_func.method.name), str(e), sleep_sec)))
sleep(sleep_sec)
return a_result
def _stationMetadata_to_tuple(a_meta):
list_meta = [None] * len(_stnmeta_attrs)
for i, a_attr in enumerate(_stnmeta_attrs):
try:
list_meta[i] = a_meta[a_attr]
except AttributeError:
# Doesn't have attribute
continue
return tuple(list_meta)
try:
_client = Client(_URL_AWDB_WSDL)
_stnmeta_attrs = (_client.factory.
create('stationMetaData').__keylist__)
except URLError as e:
if type(e.reason) == ssl.SSLError:
print("Warning: SSL Error connecting to AWDB web service. Skipping verification...")
_client = Client(_URL_AWDB_WSDL, transport=_CustomTransport())
_stnmeta_attrs = (_client.factory.
create('stationMetaData').__keylist__)
else:
raise
stn_triplets = ['878:WY:SNTL', '1033:CO:SNTL']
stn_metas = _execute_awdb_call(_client.service.
getStationMetadataMultiple,
stationTriplets=stn_triplets)
stn_tups = [_stationMetadata_to_tuple(a) for a in stn_metas]
df_stns = pd.DataFrame(stn_tups, columns=_stnmeta_attrs)
stns = df_stns.rename(columns={'actonId': 'station_id',
'name': 'station_name'})
stns['station_id'] = stns.station_id.fillna(stns.stationTriplet)
stns = stns[~stns.station_id.isnull()]
print(type(stns.beginDate[0]))
I am using Python to connect to Couchbase Database using below kind of string and the last part of the IP dynamically keep changing so I want to keep trying until successful connect string available:
With below construct the problem is if connection good at IP 10.xxx.xx.112 it is not breaking and its still trying IP 10.xxx.xx.113 and failing as no DB connection available there . I want to break when the good IP and connection available . Please follow the lines after try , except :
I am pretty sure there is better way to write this construct pro grammatically in python but I am missing something .
try:
COUCHBASE_CONNSTR = "couchbase://10.xxx.xx.110:30493" # From outside the cluster (K8s target IP may not be static always )
try:
COUCHBASE_CONNSTR = "couchbase://10.xxx.xx.111:30493"
try:
COUCHBASE_CONNSTR = "couchbase://10.xxx.xx.112:30493" # From outside the cluster (K8s target IP may not be static always )
try:
COUCHBASE_CONNSTR = "couchbase://10.80.xx.113:30493"
except:
print("3")
except:
print("4")
except:
print("5")
except:
print("6")
COUCHBASE_USER = "Administrator"
COUCHBASE_BUCKET_PASSWORD = "password"
cluster = Cluster("COUCHBASE_CONNSTR")
authenticator = PasswordAuthenticator(
"COUCHBASE_USER", "COUCHBASE_BUCKET_PASSWORD"
)
cluster.authenticate(authenticator)
cb = cluster.open_bucket("samplebucketname")
EDIT: Now contains connect procedure.
Just loop over the address space like this:
COUCHBASE_USER = "Administrator"
COUCHBASE_BUCKET_PASSWORD = "password"
authenticator = PasswordAuthenticator(
COUCHBASE_USER, COUCHBASE_BUCKET_PASSWORD
)
bucket = None
for num in range(110, 255):
ip = f"10.xxx.xx.{num}" # <-- replace x with your numbers
try:
cluster = Cluster(f"couchbase://{ip}:30493")
cluster.authenticate(authenticator)
bucket = cluster.open_bucket("samplebucketname")
# this assumes: no exception ==> connected,
# better check bucket itself
print(f"Successfully connected at {ip}")
break
except Exception as e: # <-- better specify actual expected exceptions!
print(f"Could not connect to {ip}: {e}")
# if still not connected
if bucket is None:
raise ValueError('Could not connect')
you could define all the urls in a dict and iterate over them,
urls = {"couchbase://10.xxx.xx.110:30493": "msg", "couchbase://10.xxx.xx.111:30493": "msg1"}
try:
for url, msg in urls.items():
COUCHBASE_CONNSTR = url
except:
print(msg)
This is my test so far:
test_500(self):
client = ClientConfiguration(token=token, url=url)
client.url = 'https://localhost:1234/v1/' + bucket
keys = None
try:
get_bucket = json.loads(str(client.get_bucket(bucket)))
result = get_bucket['result']
except Exception as e:
expected_status_code = 500
failure_message = "Expected status code %s but got status code %s" % (expected_status_code, e)
self.assertEquals(e, expected_status_code, failure_message)
I need to write a mock that will return a 500 response when the 'https://localhost:1234/v1/' + bucket url is used. Can this be done with unittest and if so, how or where can I find some documentation on this? I've been through this site, the unittest documentation and Youtube and can't find anythingspecific to what I want to do.
I ended up using this to create my test.
The end result is:
#responses.activate
test_500(self):
responses.add(responses.GET, 'https://localhost:1234/v1/' + bucket,
json={'error': 'server error'}, status=500)
client = ClientConfiguration(token=token, url=url)
client.url = 'https://localhost:1234/v1/'
keys = None
try:
get_bucket = json.loads(str(client.get_bucket(bucket)))
result = get_bucket['result']
except Exception as e:
expected_status_code = 500
failure_message = "Expected status code %s but got status code %s" % (expected_status_code, e)
self.assertEquals(e, expected_status_code, failure_message)
I'm experimenting with a Python script (taken from here) that traces the retweet path of a given tweetID.
I'm aware of the very restrictive rate limits on the Twitter API, but I'm hitting the following error every time I execute the script:
Caught TweepError: [{u'message': u'Rate limit exceeded', u'code': 88}]
The script I'm using is as follows:
#!/usr/bin/python -u
#
# Usage: ./trace.py <tweetId>
#
import sys
import tweepy
import Queue
import time
import json
import redis
CONSUMER_KEY = 'x'
CONSUMER_SECRET = 'x'
ACCESS_KEY = 'x'
ACCESS_SECRET = 'x'
REDIS_FOLLOWERS_KEY = "followers:%s"
# Retweeter who have not yet been connected to the social graph
unconnected = {}
# Retweeters connected to the social graph...become seeds for deeper search
connected = Queue.Queue()
# Social graph
links = []
nodes = []
#----------------------------------------
def addUserToSocialGraph (parent, child):
# parent: tweepy.models.User
# child: tweepy.models.User
#----------------------------------------
global links;
if (child):
nodes.append ({'id':child.id,
'screen_name':child.screen_name,
'followers_count':child.followers_count,
'profile_image_url':child.profile_image_url})
# TODO: Find child and parent indices in nodes in order to create the links
if (parent):
print (nodes)
print ("Adding to socialgraph: %s ==> %s" % (parent.screen_name, child.screen_name))
links.append ({'source':getNodeIndex (parent),
'target':getNodeIndex (child)})
#----------------------------------------
def getNodeIndex (user):
# node: tweepy.models.User
#----------------------------------------
global nodes
for i in range(len(nodes)):
if (user.id == nodes[i]["id"]):
return i
return -1
#----------------------------------------
def isFollower (parent, child):
# parent: tweepy.models.User
# child: tweepy.models.User
#----------------------------------------
global red
# Fetch data from Twitter if we dont have it
key = REDIS_FOLLOWERS_KEY % parent.screen_name
if ( not red.exists (key) ):
print ("No follower data for user %s" % parent.screen_name)
crawlFollowers (parent)
cache_count = red.hlen (key)
if ( parent.followers_count > (cache_count*1.1) ):
# print ("Incomplete follower data for user %s. Have %d followers but should have %d (exceeds 10% margin for error)."
# % (parent.screen_name, cache_count, parent.followers_count))
crawlFollowers (parent)
return red.hexists (key, child.screen_name)
#----------------------------------------
def crawlFollowers (user):
# user: tweepy.models.User
#----------------------------------------
print ("Retrieving followers for %s (%d)" % (user.screen_name, user.followers_count))
count = 0
follower_cursors = tweepy.Cursor (api.followers, id = user.id, count = 15)
followers_iter = follower_cursors.items()
follower = None
while True:
try:
# We may have to retry a failed follower lookup
if ( follower is None ):
follower = followers_iter.next()
# Add link to Redis
red.hset ("followers:%s" % user.screen_name, follower.screen_name, follower.followers_count)
follower = None
count += 1
except StopIteration:
break
except tweepy.error.TweepError as (err):
print ("Caught TweepError: %s" % (err))
if (err.reason == "Not authorized" ):
print ("Not authorized to see users followers. Skipping.")
break
limit = api.rate_limit_status()
if (limit['remaining_hits'] == 0):
seconds_until_reset = int (limit['reset_time_in_seconds'] - time.time())
print ("API request limit reached. Sleeping for %s seconds" % seconds_until_reset)
time.sleep (seconds_until_reset + 5)
else:
print ("Sleeping a few seconds and then retrying")
time.sleep (5)
print ("Added %d followers of user %s" % (count, user.screen_name))
#----------------------------------------
# Main
#----------------------------------------
tweetId = sys.argv[1]
# Connect to Redis
red = redis.Redis(unix_socket_path="/tmp/redis.sock")
# Connect to Twitter
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
print (api.rate_limit_status())
# Get original Tweet details
status = api.get_status (tweetId)
connected.put(status.user)
addUserToSocialGraph (None, status.user)
retweets = api.retweets (status.id)
print ("Tweet %s, originally posted by %s, was retweeted by..." % (status.id, status.user.screen_name))
for retweet in retweets:
print (retweet.user.screen_name)
unconnected[retweet.user.screen_name] = retweet.user;
# Pivot
while not (connected.empty() or len(unconnected)==0):
# Get next user
pivot = connected.get()
# Check followers of this user against unconnected retweeters
print ("Looking through followers of %s" % pivot.screen_name)
for (screen_name, retweeter) in unconnected.items():
if (isFollower(pivot, retweeter)):
print ("%s <=== %s" % (pivot.screen_name, retweeter.screen_name))
connected.put (retweeter)
addUserToSocialGraph (pivot, retweeter)
del unconnected[retweeter.screen_name]
else:
print ("%s <=X= %s" % (pivot.screen_name, retweeter.screen_name))
# Add unconnected nodes to social graph
for (screen_name, user) in unconnected.items():
addUserToSocialGraph (None, user)
# Encode data as JSON
filename = "%s.json" % status.id
print ("\n\nWriting JSON to %s" % filename)
tweet = {'id':status.id,
'retweet_count':status.retweet_count,
'text':status.text,
'author':status.user.id}
f = open (filename, 'w')
f.write (json.dumps({'tweet':tweet, 'nodes':nodes, 'links':links}, indent=2))
f.close
sys.exit()
I'm sensing that I'm making a mistake in the crawlFollowers object.
Is there a way to somehow stagger the crawler to stay within the rate limit or conform to the rate limit?
Try running with the wait_on_rate_limit flag set to True in Tweepy API:
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
I have the following block of Python code talking to DynamoDB on AWS:
try:
response = conn.batch_write_item(batch_list)
except Exception ,e:
try:
mess = e.message
except:
mess = "NOMESS"
try:
earg0 = e.args[0]
except:
earg0 = "NOEARG0"
try:
stre = str(e)
except:
stre = "NOSTRE"
print "mess = '%s'" % mess
print "earg0 = '%s'" % earg0
print "stre = '%s'" % stre
What I get is this:
mess = ''
earg0 = 'NOEARG0'
stre = 'DynamoDBValidationError: 400 Bad Request {'message': 'Item size has exceeded the maximum allowed size', '__type': 'com.amazon.coral.validate#ValidationException'}'
What I need to somehow reliably extract the message string such as 'Item size has exceeded the maximum allowed size' from e. How can I do it?
I'm assuming you're using boto to access DynamoDB.
Here is the JSONResponseError (supersuperclass of DynamoDBValidationError) __init__ method:
self.status = status
self.reason = reason
self.body = body
if self.body:
self.error_message = self.body.get('message', None)
self.error_code = self.body.get('__type', None)
if self.error_code:
self.error_code = self.error_code.split('#')[-1]
Wild guess: I would go with e.error_message to get 'Item size has exceeded ...'.
You can also print all attributes (and their values) of e:
for attr in dir(e):
print "e[%r] = '''%s'''" % (attr, getattr(e, attr))
Take e.body, You will get the error as a dictionary.
example:
{u'message': u'The conditional request failed', u'__type': u'com.amazonaws.dynamodb.v20120810#ConditionalCheckFailedException'}
From this easily you will get message.