How to reliably process web-data in Python - python

I'm using the following code to get data from a website:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
urlopener = None
sitefound = 1
tried = 0
while (sitefound != 0) and tried < retries:
try:
urlopener = urllib2.urlopen(turl, None, timer)
sitefound = 0
except urllib2.URLError:
tried += 1
if urlopener: return urlopener
else: return None
[...]
urlopener = tryconnect('www.example.com')
if not urlopener:
return None
try:
for line in urlopener:
do stuff
except httplib.IncompleteRead:
print 'incomplete'
return None
except socket.timeout:
print 'socket'
return None
return stuff
Is there a way I can handle all these exceptions without having so much boilerplate code everytime?
Thanks!

You can avoid some boilerplate code in the first function too:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
for tried in xrange(retries):
try:
return urllib2.urlopen(turl, None, timer)
except urllib2.URLError:
pass
return None
and in the second:
urlopener = tryconnect('www.example.com')
if urlopener:
try:
for line in urlopener:
do stuff
except (httplib.IncompleteRead, socket.timeout), e:
print e
return None
else:
return None

Related

Want to generate databricks notebook URL to send alerts

def _get_dbutils():
try:
import IPython
ip_shell = IPython.get_ipython()
if ip_shell is None:
raise _NoDbutilsError
return ip_shell.ns_table["user_global"]["dbutils"]
except ImportError:
raise _NoDbutilsError
except KeyError:
raise _NoDbutilsError
class _NoDbutilsError(Exception):
pass
def _get_java_dbutils():
dbutils = _get_dbutils()
return dbutils.notebook.entry_point.getDbutils()
def _get_command_context():
return _get_java_dbutils().notebook().getContext()
def _get_extra_context(context_key):
return _get_command_context().extraContext().get(context_key).get()
def _get_context_tag(context_tag_key):
tag_opt = _get_command_context().tags().get(context_tag_key)
if tag_opt.isDefined():
return tag_opt.get()
else:
return None
def acl_path_of_acl_root():
try:
return _get_command_context().aclPathOfAclRoot().get()
except Exception:
return _get_extra_context("aclPathOfAclRoot")
def _get_property_from_spark_context(key):
try:
from pyspark import TaskContext # pylint: disable=import-error
task_context = TaskContext.get()
if task_context:
return task_context.getLocalProperty(key)
except Exception:
return None
def is_databricks_default_tracking_uri(tracking_uri):
return tracking_uri.lower().strip() == "databricks"
def is_in_databricks_notebook():
if _get_property_from_spark_context("spark.databricks.notebook.id") is not None:
return True
try:
return acl_path_of_acl_root().startswith("/workspace")
except Exception:
return False
def is_in_databricks_job():
try:
return get_job_id() is not None and get_job_run_id() is not None
except Exception:
return False
def is_in_databricks_runtime():
try:
# pylint: disable=unused-import,import-error,no-name-in-module,unused-variable
import pyspark.databricks
return True
except ModuleNotFoundError:
return False
def is_dbfs_fuse_available():
with open(os.devnull, "w") as devnull_stderr, open(os.devnull, "w") as devnull_stdout:
try:
return (
subprocess.call(
["mountpoint", "/dbfs"], stderr=devnull_stderr, stdout=devnull_stdout
)
== 0
)
except Exception:
return False
def is_in_cluster():
try:
spark_session = _get_active_spark_session()
return (
spark_session is not None
and spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId") is not None
)
except Exception:
return False
def get_notebook_id():
"""Should only be called if is_in_databricks_notebook is true"""
notebook_id = _get_property_from_spark_context("spark.databricks.notebook.id")
if notebook_id is not None:
return notebook_id
acl_path = acl_path_of_acl_root()
if acl_path.startswith("/workspace"):
return acl_path.split("/")[-1]
return None
def get_notebook_path():
"""Should only be called if is_in_databricks_notebook is true"""
path = _get_property_from_spark_context("spark.databricks.notebook.path")
if path is not None:
return path
try:
return _get_command_context().notebookPath().get()
except Exception:
return _get_extra_context("notebook_path")
def get_databricks_runtime():
if is_in_databricks_runtime():
spark_session = _get_active_spark_session()
if spark_session is not None:
return spark_session.conf.get(
"spark.databricks.clusterUsageTags.sparkVersion", default=None
)
return None
def get_cluster_id():
spark_session = _get_active_spark_session()
if spark_session is None:
return None
return spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId")
def get_job_group_id():
try:
dbutils = _get_dbutils()
job_group_id = dbutils.entry_point.getJobGroupId()
if job_group_id is not None:
return job_group_id
except Exception:
return None
def get_job_id():
try:
return _get_command_context().jobId().get()
except Exception:
return _get_context_tag("jobId")
def get_job_run_id():
try:
return _get_command_context().idInJob().get()
except Exception:
return _get_context_tag("idInJob")
def get_job_type():
"""Should only be called if is_in_databricks_job is true"""
try:
return _get_command_context().jobTaskType().get()
except Exception:
return _get_context_tag("jobTaskType")
def get_command_run_id():
try:
return _get_command_context().commandRunId().get()
except Exception:
# Older runtimes may not have the commandRunId available
return None
def get_webapp_url():
"""Should only be called if is_in_databricks_notebook or is_in_databricks_jobs is true"""
url = _get_property_from_spark_context("spark.databricks.api.url")
if url is not None:
return url
try:
return _get_command_context().apiUrl().get()
except Exception:
return _get_extra_context("api_url")
def get_workspace_id():
try:
return _get_command_context().workspaceId().get()
except Exception:
return _get_context_tag("orgId")
def get_browser_hostname():
try:
return _get_command_context().browserHostName().get()
except Exception:
return _get_context_tag("browserHostName")
def get_workspace_info_from_dbutils():
dbutils = _get_dbutils()
if dbutils:
browser_hostname = get_browser_hostname()
workspace_host = "https://" + browser_hostname if browser_hostname else get_webapp_url()
workspace_id = get_workspace_id()
browserHash=_get_context_tag('browserHash')
return workspace_host+'/?o='+workspace_id+browserHash
return None, None
**This code helps me generate notebook URL when I call get_workspace_info_from_dbutils()
I get
https://odyssey-lakehouse-dev-bronze.cloud.databricks.com/?o=7808874896028593#notebook/3018684734636397/command/3018684734636399
But when I run the same notebook as a job in databricks, the browsrhostname and browserhash doesnt get generated
and I get something like this
'https://ireland.cloud.databricks.com/?o=7808874896028593#/api/2.0/workspace/get-notebook-snapshot' **
You are not getting browserhostname and browserhash probably because when it runs as a job, it doesn't have a notebook interface in the browser. Instead, the code just gets executed in the cluster (which is probably the url you are getting).
Since notebooks generally reside inside a workspace/databricks account, you can have the hostname and the workspace id as a constant. You can try getting the notebook information for a job using the Jobs API and then use the Workspace API to get the rest of the information.

How to return an object multiple times in Python?

I have a problem when doing an exception handling in a Python class.
My class structure is like:
class base():
def func():
try:
# some codes to deal with requests headers in here
requests.get('...', timeout=0.1)
return something
except:
# So when timeout in request occurs, func() will return 'Error'
return 'Error'
def A():
func()
def B():
func()
# there are about 10 functions that have called func().
def index():
reply = A()
reply = B()
# and A() B() functions are called here.
return reply
My question is, is there a way to return an 'Error' to index function directly, instead of doing exception handling every time when calling it? That is, change func() only, and it has to return 2 times(func() -> A() -> index()), so reply in index function will be 'Error'.
def test(a = 1):
try:
if a:
raise Exception
else:
return a+10
except:
return "error"
You can try something like this:
def func():
try:
# the area may raise excetion
pass
except Exception1:
# anything you like
return 'error'
except Exception2:
# anything you like
return 'error'
Using requests.Timeout
def func():
try:
# some codes to deal with requests headers in here
rq = requests.get('...', timeout=0.1)
return 'something'
except requests.Timeout as err:
# So when timeout in request occurs, func() will return 'Error'
return ('Error {}'.format(err))

Using classes inside if statement

I have this kind of code
class disable_file_system_redirection:
if mysystem == "Windows":
_disable = ctypes.windll.kernel32.Wow64DisableWow64FsRedirection
_revert = ctypes.windll.kernel32.Wow64RevertWow64FsRedirection
def __enter__(self):
self.old_value = ctypes.c_long()
self.success = self._disable(ctypes.byref(self.old_value))
def __exit__(self, type, value, traceback):
if self.success:
self._revert(self.old_value)
else:
pass
If test == “yes”:
with disable_file_system_redirection:
try:
“some code”
else:
try:
“same code”
As you can see I wrote the same code twice. I cannot merge those two same codes without getting errors. Is there a possible way to do something like that
If test = = “yes”:
with disable_file_system_redirection:
else:
pass #without disable_file_system_redirection:
“some code”
you can outsource your code into a function:
def code_to_do():
print("code_to_do")
if test == "yes":
with disable_file_system_redirection:
try:
code_to_do()
except Exception as e:
print(str(e))
else:
try:
code_to_do()
except Exception as e:
print(str(e))

Can a finally block know if there was an exception

In a Python program I have code with the following structure:
try:
value = my_function(*args)
finally:
with some_context_manager:
do_something()
if 'value' in locals():
do_something_else(value)
But the 'value' in locals() construction feels a bit fragile and I am wondering if there is a better way to do this.
What I really want is for the code inside the finally to behave slightly different depending on whether the try block raised an exception. Is there a way to know if an exception was raised?
If the goal is "when an exception was raised, do something different", how about:
exception_raised = False
try:
value = my_function(*args)
except:
exception_raised = True
raise
finally:
with some_context_manager:
do_something()
if not exception_raised:
do_something_else(value)
Now, if you're going to have multiple exceptions that you actually do something with, I'd recommend:
completed_successfully = False
try:
value = my_function(*args)
else:
completed_successfully = True
finally:
with some_context_manager:
do_something()
if completed_sucessfully:
do_something_else(value)
Here are a couple ideas:
Set value before attempting the try:
value = None
try:
value = my_function(*args)
finally:
with some_context_manager:
do_something()
if value is not None:
do_something_else(value)
Or if you want to set the value based on the exception type:
try:
value = my_function(*args)
except:
value = None
raise
finally:
with some_context_manager:
do_something()
if value is not None:
do_something_else(value)
Assign the exception to a variable in the except suite then use it in the finally suite.
foo = False
try:
raise KeyError('foo not found')
except KeyError as e:
pprint(e)
foo = e
finally:
if foo:
print(foo)
else:
print('NO')

How to create a class with a socket member

I created the following class that I want to have a socket member within it and then want to use member functions to connect, close, send, and receive.
class Connection:
Kon = ""
SSLx = ""
def Close(self):
try:
self.Kon.close()
return True
except:
return False
def Send(self,Message):
try:
self.Kon.write(Message)
return True
except Exception,e:
print e
return False
def Recieve(self):
try:
Response = self.Kon.recv(10240)
return Response
except:
return False
#Conenct Function Makes a SSL connection with the node
def Connect(self,Link):
self.SSLx = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Ip = Link[0].replace("'",'')
print Ip
Port = int(Link[1])
try:
self.SSLx.connect((Ip,Port))
return True
except Exception,e:
print "Connection Attempt Failed"
self.Kon = socket.ssl(SSLx)
return False
I ran the .Connect function successfully, but after that when I try the Send function it says 'str' object does not have a write member.
Any ideas on how to get this done?
There seems to have been a small error from a debugging process I did, I had shifted one of the lines that initialized the Kon variable few lines below. The following is the corrected class.
class Connection:
Kon = ""
SSLx = ""
def Close(self):
try:
self.Kon.close()
return True
except:
return False
def Send(self,Message):
try:
self.Kon.write(Message)
return True
except Exception,e:
return False
def Recieve(self):
try:
Response = self.Kon.read(10240)
return Response
except Exception,e:
print e
return False
#Conenct Function Makes a SSL connection with the node
def Connect(self,Link):
self.SSLx = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Ip = Link[0].replace("'",'')
print Ip
Port = int(Link[1])
try:
self.SSLx.connect((Ip,Port))
self.Kon = socket.ssl(self.SSLx)
return True
except Exception,e:
print e
print "Connection Attempt Failed"
return False

Categories