Convert a <suds.sax.text.Text> object to a string? - python

I have a column in my dataframe that is of type suds.sax.text.Text and I want to convert it to a string. I can't find much on how to do this, except for this site. Using pandas.DataFrame.astype does not work. I'm sure there is an easy way to do this. The documentation is just going over my head. I am using a web service to use some of its functions to return some metadata on weather stations. This metadata gets returned back as a suds object. Link to the web service is here.
from suds.client import Client
from suds.transport.https import HttpAuthenticated
from urllib.error import URLError
from urllib.request import HTTPSHandler
import ssl
import pandas as pd
ssl._create_default_https_context = ssl._create_unverified_context
_URL_AWDB_WSDL = 'https://www.wcc.nrcs.usda.gov/awdbWebService/services?WSDL'
def _execute_awdb_call(a_func, ntries_max=3, sleep_sec=5, **kwargs):
ntries = 0
while 1:
try:
a_result = a_func(**kwargs)
break
except Exception as e:
ntries += 1
if ntries == ntries_max:
raise
else:
print(("WARNING: Received error executing AWDB function %s:"
" %s. Sleeping %d seconds and trying again." %
(str(a_func.method.name), str(e), sleep_sec)))
sleep(sleep_sec)
return a_result
def _stationMetadata_to_tuple(a_meta):
list_meta = [None] * len(_stnmeta_attrs)
for i, a_attr in enumerate(_stnmeta_attrs):
try:
list_meta[i] = a_meta[a_attr]
except AttributeError:
# Doesn't have attribute
continue
return tuple(list_meta)
try:
_client = Client(_URL_AWDB_WSDL)
_stnmeta_attrs = (_client.factory.
create('stationMetaData').__keylist__)
except URLError as e:
if type(e.reason) == ssl.SSLError:
print("Warning: SSL Error connecting to AWDB web service. Skipping verification...")
_client = Client(_URL_AWDB_WSDL, transport=_CustomTransport())
_stnmeta_attrs = (_client.factory.
create('stationMetaData').__keylist__)
else:
raise
stn_triplets = ['878:WY:SNTL', '1033:CO:SNTL']
stn_metas = _execute_awdb_call(_client.service.
getStationMetadataMultiple,
stationTriplets=stn_triplets)
stn_tups = [_stationMetadata_to_tuple(a) for a in stn_metas]
df_stns = pd.DataFrame(stn_tups, columns=_stnmeta_attrs)
stns = df_stns.rename(columns={'actonId': 'station_id',
'name': 'station_name'})
stns['station_id'] = stns.station_id.fillna(stns.stationTriplet)
stns = stns[~stns.station_id.isnull()]
print(type(stns.beginDate[0]))

Related

LDAP: querying for all users in entire domain using sAMAccountName

I have modified this code python-paged-ldap-snippet.py from https://gist.github.com/mattfahrner/c228ead9c516fc322d3a
My problem is that when I change my SEARCHFILTER from '(&(objectCategory=person)(objectClass=user))' to '(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
it runs just fine.
If it is on SEARCHFILTER='(&(objectCategory=person)(objectClass=user))', I notice that the code is not entering the writeToFile function.
The objective of the code is to dump all the user information and parse the info into a file.
I tried running LDAPSEARCH against '(&(objectCategory=person)(objectClass=user))' and I manage to get the output .
Not sure what is wrong. Suggestions are greatly appreciated.
Thank you.
#!/usr/bin/python
import sys
import ldap
import os
LDAPSERVER='ldap://xxx.xxx.xxx.xxx:389'
BASEDN='dc=ad,dc=company,dc=com'
LDAPUSER = "CN=LDAPuser,OU=XXX,OU=Users,DC=ad,DC=company,DC=com"
LDAPPASSWORD = 'LDAPpassword'
PAGESIZE = 20000
ATTRLIST = ['sAMAccountName','uid']
SEARCHFILTER='(&(objectCategory=person)(objectClass=user))'
#SEARCHFILTER='(&(objectCategory=person)(objectClass=user)(memberOf=CN=Users0,OU=Groups,DC=ad,DC=company,DC=com))'
data = []
ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW)
ldap.set_option(ldap.OPT_REFERRALS, 0)
l = ldap.initialize(LDAPSERVER)
l.protocol_version = 3 # Paged results only apply to LDAP v3
try:
l.simple_bind_s(LDAPUSER, LDAPPASSWORD)
print ' Login Done, Searching data'
except ldap.LDAPError as e:
exit('LDAP bind failed: %s' % e)
lc = ldap.controls.SimplePagedResultsControl(True,size=PAGESIZE,cookie='')
def writeToFile(data):
print ' Writing data to file'
#code to print all output into CVS file
while True:
try:
msgid = l.search_ext(BASEDN, ldap.SCOPE_SUBTREE, SEARCHFILTER, ATTRLIST, serverctrls=[lc])
except ldap.LDAPError as e:
sys.exit('LDAP search failed: %s' % e)
try:
rtype, rdata, rmsgid, serverctrls = l.result3(msgid)
except ldap.LDAPError as e:
sys.exit('Could not pull LDAP results: %s' % e)
for dn, attrs in rdata:
data.append(attrs)
pctrls = [
c for c in serverctrls if c.controlType == ldap.controls.SimplePagedResultsControl.controlType ]
if not pctrls:
print >> sys.stderr, 'Warning: Server ignores RFC 2696 control.'
break
cookie = pctrls[0].cookie
if not cookie:
writeToFile(data)
print 'Task Complete'
break
lc.controlValue = (PAGESIZE, cookie)
PAGESIZE = 20000
Lower your page size to a value <= 1000, since that's the max AD will give you at a time anyway. It's possible that it's waiting for 20000 records before requesting the next page and never getting it.

Python: Nested while loop inside a for loop breaking unexpectedly without any error

I have a while loop nested within a for loop that's running over a json array collected from firestore, which collects stock symbols to pass to another api to gather minute by minute trading data to put back into the firestore db.
While I'm running the loop, it'll stop unexpectedly without any error around the fourth or sixth (never more) time through the 389 entry while loop.
Any idea why this is? Is it something with my code? I noticed if I changed the limit in the while loop from 389 down to 100, it worked through all the companies within the json array. But it won't get through many than four companies down the list if it's the full 389 entries.
Anyway, thanks for the help!
import requests
import json
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime
cred = credentials.Certificate("./serviceAccountKey.json")
firebase_admin.initialize_app(cred)
db = firestore.client()
doc_ref1 = db.collection(u'Quiver').stream()
for doc in doc_ref1:
symbol = doc.id
api_url = "https://api.iextrading.com/1.0/stock/{}/chart/1d".format(symbol)
query_url = api_url
r = requests.get(query_url)
if r.status_code != 200:
print("Error:", r.status_code)
continue
if r.status_code == 404:
print("Error:", r.status_code, symbol)
continue
json_stock = r.json()
b = 0
while b <= 100:
try:
date = json_stock[b]['date']
minute = json_stock[b]['minute']
label = json_stock[b]['label']
high = json_stock[b]['high']
low = json_stock[b]['low']
average = json_stock[b]['average']
volume = json_stock[b]['volume']
notional = json_stock[b]['notional']
numberOfTrades = json_stock[b]['numberOfTrades']
marketHigh = json_stock[b]['marketHigh']
marketLow = json_stock[b]['marketLow']
marketAverage = json_stock[b]['marketAverage']
marketVolume = json_stock[b]['marketVolume']
marketNotional = json_stock[b]['marketNotional']
marketNumberOfTrades = json_stock[b]['marketNumberOfTrades']
open = json_stock[b]['open']
close = json_stock[b]['close']
marketOpen = json_stock[b]['marketOpen']
marketClose = json_stock[b]['marketClose']
changeOverTime = json_stock[b]['changeOverTime']
marketChangeOverTime = json_stock[b]['marketChangeOverTime']
doc_ref = db.collection(u'dailies').document(u'{}-{}'.format(minute, symbol))
doc_ref.set({
u'date':u'{}'.format(date),
u'minute':u'{}'.format(minute),
u'label':u'{}'.format(label),
u'high':u'{}'.format(high),
u'average':u'{}'.format(average),
u'notional':u'{}'.format(notional),
u'number of trades':u'{}'.format(numberOfTrades),
u'market high':u'{}'.format(marketHigh),
u'market low':u'{}'.format(marketLow),
u'market average':u'{}'.format(marketAverage),
u'market volume':u'{}'.format(marketVolume),
u'market notional':u'{}'.format(marketNotional),
u'market number of trades':u'{}'.format(marketNumberOfTrades),
u'open':u'{}'.format(open),
u'close':u'{}'.format(close),
u'market open':u'{}'.format(marketOpen),
u'market close':u'{}'.format(marketClose),
u'change over time':u'{}'.format(changeOverTime),
u'market change over time':u'{}'.format(marketChangeOverTime)
})
print("{} {}: {}".format(symbol, minute, b))
b += 1
except IndexError:
print("Index Error")
break
you can use:
except Exception as errmsg:
print(errmsg)
and provide more information

Mocking a 500 response when an operation is performed

This is my test so far:
test_500(self):
client = ClientConfiguration(token=token, url=url)
client.url = 'https://localhost:1234/v1/' + bucket
keys = None
try:
get_bucket = json.loads(str(client.get_bucket(bucket)))
result = get_bucket['result']
except Exception as e:
expected_status_code = 500
failure_message = "Expected status code %s but got status code %s" % (expected_status_code, e)
self.assertEquals(e, expected_status_code, failure_message)
I need to write a mock that will return a 500 response when the 'https://localhost:1234/v1/' + bucket url is used. Can this be done with unittest and if so, how or where can I find some documentation on this? I've been through this site, the unittest documentation and Youtube and can't find anythingspecific to what I want to do.
I ended up using this to create my test.
The end result is:
#responses.activate
test_500(self):
responses.add(responses.GET, 'https://localhost:1234/v1/' + bucket,
json={'error': 'server error'}, status=500)
client = ClientConfiguration(token=token, url=url)
client.url = 'https://localhost:1234/v1/'
keys = None
try:
get_bucket = json.loads(str(client.get_bucket(bucket)))
result = get_bucket['result']
except Exception as e:
expected_status_code = 500
failure_message = "Expected status code %s but got status code %s" % (expected_status_code, e)
self.assertEquals(e, expected_status_code, failure_message)

Trapping a custom error in python

I'm trying to trap the following error in a try/exception block, but as this is a custom module that is generating the error - not generating a standard error such as ValueError for example. What is the correct way to catch such errors?
Here is my code:
try:
obj = IPWhois(ip_address)
except Exception(IPDefinedError):
results = {}
else:
results = obj.lookup()
The most obvious way:
except IPDefinedError:
gives:
NameError: name 'IPDefinedError' is not defined
The error returned that I want to check for is:
ipwhois.exceptions.IPDefinedError
ipwhois.exceptions.IPDefinedError: IPv4 address '127.0.0.1' is already defined as 'Private-Use Networks' via 'RFC 1918'.
The issue here is the import!
I had the import as
from ipwhois import IPWhois
but I also needed
import ipwhois
So the following works:
try:
obj = IPWhois(ip_address)
except ipwhois.exceptions.IPDefinedError:
results = {}
else:
results = obj.lookup()
Here is a quick recap. Yes, the error in your question did look like it was likely related to an import issue (as per my comment ;) ).
from pprint import pprint as pp
class IPDefinedError(Exception):
"""Mock IPDefinedError implementation
"""
pass
class IPWhois(object):
"""Mock IPWhois implementation
"""
def __init__(self, ip_address):
if ip_address == "127.0.0.1":
raise IPDefinedError(
"IPv4 address '127.0.0.1' is already defined as 'Private-Use Networks' via 'RFC 1918'.")
self._ip_address = ip_address
def lookup(self):
return "RESULT"
def lookup(ip_address):
""" calculates IPWhois lookup result or None if unsuccessful
:param ip_address:
:return: IPWhois lookup result or None if unsuccessful
"""
result = None
try:
obj = IPWhois(ip_address)
result = obj.lookup()
except IPDefinedError as e:
msg = str(e)
print("Error received: {}".format(msg)) # do something with msg
return result
if __name__ == '__main__':
results = map(lookup, ["192.168.1.1", "127.0.0.1"])
pp(list(results)) # ['RESULT', None]

Unbound local variable Python 3.2

This error pops up randomly, and I'm pretty sure it's because the infoGotten variable isn't initialized before the return statement calls it. The part that has me puzzled is how it's getting to that part of the code to produce that error in the first place. Hopefully someone can explain to me why that is as I haven't been able to figure it out yet. I'm guessing it's because of the try/except statement but I did some searching and checked 7.4 in the manual and it doesn't appear (to me anyways) that I'm doing something incorrect.
breakLoop = 0
def get_item_info(linkParameters):
global breakLoop
nheaders = {'User-Agent' : 'Firefox/15.0.1'}
purl = 'http://example.com/something.php'
pd = linkParameters
nreq = urllib.request.Request(purl, pd, nheaders)
if breakLoop >= 4:
return 'Request timed out {} times'.format(breakLoop)
try:
nresponse = urllib.request.urlopen(nreq)
except urllib.error.URLError:
breakLoop += 1
get_item_info(pd)
except urllib.error.HTTPError:
breakLoop += 1
get_item_info(pd)
else:
infoGotten = nresponse.read()
return infoGotten
Thanks!
You need to return the results of the recursive calls, so it should be return get_item_info(pd) in the except clauses (which I combined below):
breakLoop = 0
def get_item_info(linkParameters):
nheaders = {'User-Agent' : 'Firefox/15.0.1'}
purl = 'http://example.com/something.php'
pd = linkParameters
nreq = urllib.request.Request(purl, pd, nheaders)
if breakLoop >= 4:
return 'Request timed out {} times'.format(breakLoop)
try:
nresponse = urllib.request.urlopen(nreq)
except (urllib.error.URLError, urllib.error.HTTPError):
breakLoop += 1
return get_item_info(pd)
else:
return nresponse.read()
Recursion seems like a weird way to perform the retries though, why not use a loop? The following seems more clear:
def get_item_info(linkParameters):
nheaders = {'User-Agent' : 'Firefox/15.0.1'}
purl = 'http://example.com/something.php'
pd = linkParameters
for i in range(5):
nreq = urllib.request.Request(purl, pd, nheaders)
try:
nresponse = urllib.request.urlopen(nreq)
return nresponse.read()
except (urllib.error.URLError, urllib.error.HTTPError):
pass
return 'Request timed out 4 times'

Categories