Want to generate databricks notebook URL to send alerts - python

def _get_dbutils():
try:
import IPython
ip_shell = IPython.get_ipython()
if ip_shell is None:
raise _NoDbutilsError
return ip_shell.ns_table["user_global"]["dbutils"]
except ImportError:
raise _NoDbutilsError
except KeyError:
raise _NoDbutilsError
class _NoDbutilsError(Exception):
pass
def _get_java_dbutils():
dbutils = _get_dbutils()
return dbutils.notebook.entry_point.getDbutils()
def _get_command_context():
return _get_java_dbutils().notebook().getContext()
def _get_extra_context(context_key):
return _get_command_context().extraContext().get(context_key).get()
def _get_context_tag(context_tag_key):
tag_opt = _get_command_context().tags().get(context_tag_key)
if tag_opt.isDefined():
return tag_opt.get()
else:
return None
def acl_path_of_acl_root():
try:
return _get_command_context().aclPathOfAclRoot().get()
except Exception:
return _get_extra_context("aclPathOfAclRoot")
def _get_property_from_spark_context(key):
try:
from pyspark import TaskContext # pylint: disable=import-error
task_context = TaskContext.get()
if task_context:
return task_context.getLocalProperty(key)
except Exception:
return None
def is_databricks_default_tracking_uri(tracking_uri):
return tracking_uri.lower().strip() == "databricks"
def is_in_databricks_notebook():
if _get_property_from_spark_context("spark.databricks.notebook.id") is not None:
return True
try:
return acl_path_of_acl_root().startswith("/workspace")
except Exception:
return False
def is_in_databricks_job():
try:
return get_job_id() is not None and get_job_run_id() is not None
except Exception:
return False
def is_in_databricks_runtime():
try:
# pylint: disable=unused-import,import-error,no-name-in-module,unused-variable
import pyspark.databricks
return True
except ModuleNotFoundError:
return False
def is_dbfs_fuse_available():
with open(os.devnull, "w") as devnull_stderr, open(os.devnull, "w") as devnull_stdout:
try:
return (
subprocess.call(
["mountpoint", "/dbfs"], stderr=devnull_stderr, stdout=devnull_stdout
)
== 0
)
except Exception:
return False
def is_in_cluster():
try:
spark_session = _get_active_spark_session()
return (
spark_session is not None
and spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId") is not None
)
except Exception:
return False
def get_notebook_id():
"""Should only be called if is_in_databricks_notebook is true"""
notebook_id = _get_property_from_spark_context("spark.databricks.notebook.id")
if notebook_id is not None:
return notebook_id
acl_path = acl_path_of_acl_root()
if acl_path.startswith("/workspace"):
return acl_path.split("/")[-1]
return None
def get_notebook_path():
"""Should only be called if is_in_databricks_notebook is true"""
path = _get_property_from_spark_context("spark.databricks.notebook.path")
if path is not None:
return path
try:
return _get_command_context().notebookPath().get()
except Exception:
return _get_extra_context("notebook_path")
def get_databricks_runtime():
if is_in_databricks_runtime():
spark_session = _get_active_spark_session()
if spark_session is not None:
return spark_session.conf.get(
"spark.databricks.clusterUsageTags.sparkVersion", default=None
)
return None
def get_cluster_id():
spark_session = _get_active_spark_session()
if spark_session is None:
return None
return spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId")
def get_job_group_id():
try:
dbutils = _get_dbutils()
job_group_id = dbutils.entry_point.getJobGroupId()
if job_group_id is not None:
return job_group_id
except Exception:
return None
def get_job_id():
try:
return _get_command_context().jobId().get()
except Exception:
return _get_context_tag("jobId")
def get_job_run_id():
try:
return _get_command_context().idInJob().get()
except Exception:
return _get_context_tag("idInJob")
def get_job_type():
"""Should only be called if is_in_databricks_job is true"""
try:
return _get_command_context().jobTaskType().get()
except Exception:
return _get_context_tag("jobTaskType")
def get_command_run_id():
try:
return _get_command_context().commandRunId().get()
except Exception:
# Older runtimes may not have the commandRunId available
return None
def get_webapp_url():
"""Should only be called if is_in_databricks_notebook or is_in_databricks_jobs is true"""
url = _get_property_from_spark_context("spark.databricks.api.url")
if url is not None:
return url
try:
return _get_command_context().apiUrl().get()
except Exception:
return _get_extra_context("api_url")
def get_workspace_id():
try:
return _get_command_context().workspaceId().get()
except Exception:
return _get_context_tag("orgId")
def get_browser_hostname():
try:
return _get_command_context().browserHostName().get()
except Exception:
return _get_context_tag("browserHostName")
def get_workspace_info_from_dbutils():
dbutils = _get_dbutils()
if dbutils:
browser_hostname = get_browser_hostname()
workspace_host = "https://" + browser_hostname if browser_hostname else get_webapp_url()
workspace_id = get_workspace_id()
browserHash=_get_context_tag('browserHash')
return workspace_host+'/?o='+workspace_id+browserHash
return None, None
**This code helps me generate notebook URL when I call get_workspace_info_from_dbutils()
I get
https://odyssey-lakehouse-dev-bronze.cloud.databricks.com/?o=7808874896028593#notebook/3018684734636397/command/3018684734636399
But when I run the same notebook as a job in databricks, the browsrhostname and browserhash doesnt get generated
and I get something like this
'https://ireland.cloud.databricks.com/?o=7808874896028593#/api/2.0/workspace/get-notebook-snapshot' **

You are not getting browserhostname and browserhash probably because when it runs as a job, it doesn't have a notebook interface in the browser. Instead, the code just gets executed in the cluster (which is probably the url you are getting).
Since notebooks generally reside inside a workspace/databricks account, you can have the hostname and the workspace id as a constant. You can try getting the notebook information for a job using the Jobs API and then use the Workspace API to get the rest of the information.

Related

Usage of Pytest Fixture for screenshot(defined in conftest.py) and used with in a method under class structure

Defined pytest fixture(names as rp_logger) in conftest.py file and trying to use in a method in different class as rp_logger.info(""). Getting an error that attribute info is not defined .
conftest file:
#pytest.fixture
def rp_logger(request):
# Setting up a logging.
logging.setLoggerClass(RPLogger)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# Create handler for Report Portal.
rp_handler = RPLogHandler(request.node.config.py_test_service)
# Set INFO level for Report Portal handler.
rp_handler.setLevel(logging.INFO)
return logger
testsuite file:
#pytest.mark.single
#pytest.mark.events
#pytest.mark.usefixtures("rp_logger")
class MyTest(object):
def test_verify_events_page(self, rp_logger):
rp_logger.info("bbbb")
Error stack trace :
def run(self, result=None):
orig_result = result
if result is None:
result = self.defaultTestResult()
startTestRun = getattr(result, 'startTestRun', None)
if startTestRun is not None:
startTestRun()
self._resultForDoCleanups = result
result.startTest(self)
testMethod = getattr(self, self._testMethodName)
if (getattr(self.__class__, "__unittest_skip__", False) or
getattr(testMethod, "__unittest_skip__", False)):
# If the class or method was skipped.
try:
skip_why = (getattr(self.__class__, '__unittest_skip_why__', '')
or getattr(testMethod, '__unittest_skip_why__', ''))
self._addSkip(result, skip_why)
finally:
result.stopTest(self)
return
try:
success = False
try:
self.setUp()
except SkipTest as e:
self._addSkip(result, str(e))
except KeyboardInterrupt:
raise
except:
result.addError(self, sys.exc_info())
else:
try:
testMethod()
E TypeError: test_verify_events_page() takes exactly 2 arguments (1 given)

mock and side_effect substitution - keeping access to the original class and its attributes

I want to mock method _subprocess on a particular instance of a class.
Specifically when the task fires off pip freeze as a command (in that case its taskname is freeze).
class Command(object):
def __init__(self, mgr, taskname, config):
self.mgr = mgr
self.taskname = taskname
self.config = config
self.append = self.config.get("append", False)
self.stderr = ""
def _subprocess(self, cmd, fnp_o, self_=None):
try:
mode = "a" if self.append else "w"
fnp_stderr = self.mgr._get_fnp("log")
with open(fnp_stderr, "a") as ferr:
ferr.write("cmd: %s\nstderr begin:\n" % (cmd))
with open(fnp_o, mode) as fo:
proc = subprocess.check_call(
cmd.split(),
stdout=fo,
stderr=ferr,
cwd=self.mgr.workdir,
encoding="utf-8",
)
ferr.write("stderr end\n\n")
except (Exception,) as e:
if cpdb(): pdb.set_trace()
raise
This is the test method:
def fake_subprocess(self, cmd, fnp_o, self_):
try:
raise NotImplementedError("fake_subprocess(%s)" % (locals()))
except (Exception,) as e:
pdb.set_trace()
raise
def test_001_scan(self):
try:
with patch.object(Command, '_subprocess', side_effect = self.fake_subprocess) as mock_method:
options = self.get_options()
self.mgr = Main(options)
self.mgr.process()
except (Exception,) as e:
pdb.set_trace()
raise
My problem is two-fold.
First, the self in fake_subprocess refers to the UnitTest object, not the Command object. My use of the self_ parameter gets around that.
Second, in most cases, except for pip freeze I want to run the original subprocess, not the fake one.
Now, I can probably power through this by keeping an extra reference to Command._subprocess and using self_
But is there a more elegant way? Very naive when it comes to unittest.Mock.
This is what ended up working for me:
test-side
def fake_subprocess(self, cmd, fnp_o, self_):
try:
if self_.taskname != "freeze":
return self_._subprocess_actual(cmd, fnp_o, self_)
with open(fnp_o, self_.mode) as fo:
fo.write(self.fake_subprocess_payload["freeze"])
except (Exception,) as e:
raise
def test_001_scan(self):
try:
with patch.object(
Command, "_subprocess", side_effect=self.fake_subprocess
) as mock_method:
options = self.get_options()
self.mgr = Main(options)
self.mgr.process()
except (Exception,) as e:
raise
actual code-side
class Command(object):
def _subprocess(self, cmd, fnp_o, self_=None):
try:
fnp_stderr = self.mgr._get_fnp("log")
with open(fnp_stderr, "a") as ferr:
ferr.write("cmd: %s\nstderr begin:\n" % (cmd))
with open(fnp_o, self.mode) as fo:
proc = subprocess.check_call(
cmd.split(), stdout=fo, stderr=ferr, cwd=self.mgr.workdir
)
ferr.write("stderr end\n\n")
except (Exception,) as e:
if cpdb():
pdb.set_trace()
raise
_subprocess_actual = _subprocess
def run(self):
try:
t_cmd = self.config["cmdline"] # .replace(r"\\","\\")
t_fnp = os.path.join(self.mgr.workdir, self.config["filename"])
fnp_log = "subprocess.log"
cmd = sub_template(t_cmd, self, self.mgr.vars)
fnp_o = sub_template(t_fnp, self, self.mgr.vars)
self._subprocess(cmd=cmd, fnp_o=fnp_o, self_=self)
except (Exception,) as e:
if cpdb():
pdb.set_trace()
raise

Async call and not wait while using pubnub with django

This is a module from PubNub that I'm using to publish a message to a topic from an API. By design I've kept the PubNub object singleton.
class Pubnub:
instance = None
#classmethod
def get(cls):
if cls.instance is None:
cls.instance = cls()
return cls.instance
def __init__(self):
with open('config/config.yaml', 'r') as stream:
try:
conf = yaml.load(stream)
pnconfig = PNConfiguration()
pnconfig.subscribe_key = conf['pubnub']['publish_key']
pnconfig.publish_key = conf['pubnub']['subscribe_key']
pnconfig.ssl = False
self.pubnub = PubNub(pnconfig)
except yaml.YAMLError as e:
logger.error(str(e))
def publish(self, channel):
try:
envelope = self.pubnub.publish().channel(channel).message({
'message': True
}).sync()
print("publish timetoken: %d" % envelope.result.timetoken)
except PubNubException as e:
logger.error(str(e))
This is how I'm calling it,
class SendCommunityTextMessage(views.APIView):
def post(self, request, **kwargs):
try:
client_id = request.GET['client_id']
client_secret = request.GET['client_secret']
if Authenticator.authenticate_client(client_id, client_secret):
try:
//do something
try:
//do something more
pubbub = Pubnub.get()
pubbub.publish(receiver.hex_code)
return Response({"Success": CommunityTextMessageSerializer(message).data},
status=status.HTTP_200_OK)
except KeyError as e:
return Response({"Failure": str(e)}, status=status.HTTP_400_BAD_REQUEST)
except (User.DoesNotExist, CommunityRoom.DoesNotExist) as e:
return Response({"Failure": str(e)}, status=status.HTTP_404_NOT_FOUND)
else:
return Response({"Failure": "Invalid client"}, status=status.HTTP_403_FORBIDDEN)
except KeyError as _:
return Response({"Failure": "Probably a typo, read the docs to use this API."},
status=status.HTTP_400_BAD_REQUEST)
The issue is this slows down the API by minutes. How can I call the two lines,
pubbub = Pubnub.get()
pubbub.publish(receiver.hex_code)
asynchronously and return out of the view without waiting for the call to finish.
Thanks in anticipation.

Using classes inside if statement

I have this kind of code
class disable_file_system_redirection:
if mysystem == "Windows":
_disable = ctypes.windll.kernel32.Wow64DisableWow64FsRedirection
_revert = ctypes.windll.kernel32.Wow64RevertWow64FsRedirection
def __enter__(self):
self.old_value = ctypes.c_long()
self.success = self._disable(ctypes.byref(self.old_value))
def __exit__(self, type, value, traceback):
if self.success:
self._revert(self.old_value)
else:
pass
If test == “yes”:
with disable_file_system_redirection:
try:
“some code”
else:
try:
“same code”
As you can see I wrote the same code twice. I cannot merge those two same codes without getting errors. Is there a possible way to do something like that
If test = = “yes”:
with disable_file_system_redirection:
else:
pass #without disable_file_system_redirection:
“some code”
you can outsource your code into a function:
def code_to_do():
print("code_to_do")
if test == "yes":
with disable_file_system_redirection:
try:
code_to_do()
except Exception as e:
print(str(e))
else:
try:
code_to_do()
except Exception as e:
print(str(e))

How to reliably process web-data in Python

I'm using the following code to get data from a website:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
urlopener = None
sitefound = 1
tried = 0
while (sitefound != 0) and tried < retries:
try:
urlopener = urllib2.urlopen(turl, None, timer)
sitefound = 0
except urllib2.URLError:
tried += 1
if urlopener: return urlopener
else: return None
[...]
urlopener = tryconnect('www.example.com')
if not urlopener:
return None
try:
for line in urlopener:
do stuff
except httplib.IncompleteRead:
print 'incomplete'
return None
except socket.timeout:
print 'socket'
return None
return stuff
Is there a way I can handle all these exceptions without having so much boilerplate code everytime?
Thanks!
You can avoid some boilerplate code in the first function too:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
for tried in xrange(retries):
try:
return urllib2.urlopen(turl, None, timer)
except urllib2.URLError:
pass
return None
and in the second:
urlopener = tryconnect('www.example.com')
if urlopener:
try:
for line in urlopener:
do stuff
except (httplib.IncompleteRead, socket.timeout), e:
print e
return None
else:
return None

Categories