def _get_dbutils():
try:
import IPython
ip_shell = IPython.get_ipython()
if ip_shell is None:
raise _NoDbutilsError
return ip_shell.ns_table["user_global"]["dbutils"]
except ImportError:
raise _NoDbutilsError
except KeyError:
raise _NoDbutilsError
class _NoDbutilsError(Exception):
pass
def _get_java_dbutils():
dbutils = _get_dbutils()
return dbutils.notebook.entry_point.getDbutils()
def _get_command_context():
return _get_java_dbutils().notebook().getContext()
def _get_extra_context(context_key):
return _get_command_context().extraContext().get(context_key).get()
def _get_context_tag(context_tag_key):
tag_opt = _get_command_context().tags().get(context_tag_key)
if tag_opt.isDefined():
return tag_opt.get()
else:
return None
def acl_path_of_acl_root():
try:
return _get_command_context().aclPathOfAclRoot().get()
except Exception:
return _get_extra_context("aclPathOfAclRoot")
def _get_property_from_spark_context(key):
try:
from pyspark import TaskContext # pylint: disable=import-error
task_context = TaskContext.get()
if task_context:
return task_context.getLocalProperty(key)
except Exception:
return None
def is_databricks_default_tracking_uri(tracking_uri):
return tracking_uri.lower().strip() == "databricks"
def is_in_databricks_notebook():
if _get_property_from_spark_context("spark.databricks.notebook.id") is not None:
return True
try:
return acl_path_of_acl_root().startswith("/workspace")
except Exception:
return False
def is_in_databricks_job():
try:
return get_job_id() is not None and get_job_run_id() is not None
except Exception:
return False
def is_in_databricks_runtime():
try:
# pylint: disable=unused-import,import-error,no-name-in-module,unused-variable
import pyspark.databricks
return True
except ModuleNotFoundError:
return False
def is_dbfs_fuse_available():
with open(os.devnull, "w") as devnull_stderr, open(os.devnull, "w") as devnull_stdout:
try:
return (
subprocess.call(
["mountpoint", "/dbfs"], stderr=devnull_stderr, stdout=devnull_stdout
)
== 0
)
except Exception:
return False
def is_in_cluster():
try:
spark_session = _get_active_spark_session()
return (
spark_session is not None
and spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId") is not None
)
except Exception:
return False
def get_notebook_id():
"""Should only be called if is_in_databricks_notebook is true"""
notebook_id = _get_property_from_spark_context("spark.databricks.notebook.id")
if notebook_id is not None:
return notebook_id
acl_path = acl_path_of_acl_root()
if acl_path.startswith("/workspace"):
return acl_path.split("/")[-1]
return None
def get_notebook_path():
"""Should only be called if is_in_databricks_notebook is true"""
path = _get_property_from_spark_context("spark.databricks.notebook.path")
if path is not None:
return path
try:
return _get_command_context().notebookPath().get()
except Exception:
return _get_extra_context("notebook_path")
def get_databricks_runtime():
if is_in_databricks_runtime():
spark_session = _get_active_spark_session()
if spark_session is not None:
return spark_session.conf.get(
"spark.databricks.clusterUsageTags.sparkVersion", default=None
)
return None
def get_cluster_id():
spark_session = _get_active_spark_session()
if spark_session is None:
return None
return spark_session.conf.get("spark.databricks.clusterUsageTags.clusterId")
def get_job_group_id():
try:
dbutils = _get_dbutils()
job_group_id = dbutils.entry_point.getJobGroupId()
if job_group_id is not None:
return job_group_id
except Exception:
return None
def get_job_id():
try:
return _get_command_context().jobId().get()
except Exception:
return _get_context_tag("jobId")
def get_job_run_id():
try:
return _get_command_context().idInJob().get()
except Exception:
return _get_context_tag("idInJob")
def get_job_type():
"""Should only be called if is_in_databricks_job is true"""
try:
return _get_command_context().jobTaskType().get()
except Exception:
return _get_context_tag("jobTaskType")
def get_command_run_id():
try:
return _get_command_context().commandRunId().get()
except Exception:
# Older runtimes may not have the commandRunId available
return None
def get_webapp_url():
"""Should only be called if is_in_databricks_notebook or is_in_databricks_jobs is true"""
url = _get_property_from_spark_context("spark.databricks.api.url")
if url is not None:
return url
try:
return _get_command_context().apiUrl().get()
except Exception:
return _get_extra_context("api_url")
def get_workspace_id():
try:
return _get_command_context().workspaceId().get()
except Exception:
return _get_context_tag("orgId")
def get_browser_hostname():
try:
return _get_command_context().browserHostName().get()
except Exception:
return _get_context_tag("browserHostName")
def get_workspace_info_from_dbutils():
dbutils = _get_dbutils()
if dbutils:
browser_hostname = get_browser_hostname()
workspace_host = "https://" + browser_hostname if browser_hostname else get_webapp_url()
workspace_id = get_workspace_id()
browserHash=_get_context_tag('browserHash')
return workspace_host+'/?o='+workspace_id+browserHash
return None, None
**This code helps me generate notebook URL when I call get_workspace_info_from_dbutils()
I get
https://odyssey-lakehouse-dev-bronze.cloud.databricks.com/?o=7808874896028593#notebook/3018684734636397/command/3018684734636399
But when I run the same notebook as a job in databricks, the browsrhostname and browserhash doesnt get generated
and I get something like this
'https://ireland.cloud.databricks.com/?o=7808874896028593#/api/2.0/workspace/get-notebook-snapshot' **
You are not getting browserhostname and browserhash probably because when it runs as a job, it doesn't have a notebook interface in the browser. Instead, the code just gets executed in the cluster (which is probably the url you are getting).
Since notebooks generally reside inside a workspace/databricks account, you can have the hostname and the workspace id as a constant. You can try getting the notebook information for a job using the Jobs API and then use the Workspace API to get the rest of the information.
Related
Defined pytest fixture(names as rp_logger) in conftest.py file and trying to use in a method in different class as rp_logger.info(""). Getting an error that attribute info is not defined .
conftest file:
#pytest.fixture
def rp_logger(request):
# Setting up a logging.
logging.setLoggerClass(RPLogger)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# Create handler for Report Portal.
rp_handler = RPLogHandler(request.node.config.py_test_service)
# Set INFO level for Report Portal handler.
rp_handler.setLevel(logging.INFO)
return logger
testsuite file:
#pytest.mark.single
#pytest.mark.events
#pytest.mark.usefixtures("rp_logger")
class MyTest(object):
def test_verify_events_page(self, rp_logger):
rp_logger.info("bbbb")
Error stack trace :
def run(self, result=None):
orig_result = result
if result is None:
result = self.defaultTestResult()
startTestRun = getattr(result, 'startTestRun', None)
if startTestRun is not None:
startTestRun()
self._resultForDoCleanups = result
result.startTest(self)
testMethod = getattr(self, self._testMethodName)
if (getattr(self.__class__, "__unittest_skip__", False) or
getattr(testMethod, "__unittest_skip__", False)):
# If the class or method was skipped.
try:
skip_why = (getattr(self.__class__, '__unittest_skip_why__', '')
or getattr(testMethod, '__unittest_skip_why__', ''))
self._addSkip(result, skip_why)
finally:
result.stopTest(self)
return
try:
success = False
try:
self.setUp()
except SkipTest as e:
self._addSkip(result, str(e))
except KeyboardInterrupt:
raise
except:
result.addError(self, sys.exc_info())
else:
try:
testMethod()
E TypeError: test_verify_events_page() takes exactly 2 arguments (1 given)
I want to mock method _subprocess on a particular instance of a class.
Specifically when the task fires off pip freeze as a command (in that case its taskname is freeze).
class Command(object):
def __init__(self, mgr, taskname, config):
self.mgr = mgr
self.taskname = taskname
self.config = config
self.append = self.config.get("append", False)
self.stderr = ""
def _subprocess(self, cmd, fnp_o, self_=None):
try:
mode = "a" if self.append else "w"
fnp_stderr = self.mgr._get_fnp("log")
with open(fnp_stderr, "a") as ferr:
ferr.write("cmd: %s\nstderr begin:\n" % (cmd))
with open(fnp_o, mode) as fo:
proc = subprocess.check_call(
cmd.split(),
stdout=fo,
stderr=ferr,
cwd=self.mgr.workdir,
encoding="utf-8",
)
ferr.write("stderr end\n\n")
except (Exception,) as e:
if cpdb(): pdb.set_trace()
raise
This is the test method:
def fake_subprocess(self, cmd, fnp_o, self_):
try:
raise NotImplementedError("fake_subprocess(%s)" % (locals()))
except (Exception,) as e:
pdb.set_trace()
raise
def test_001_scan(self):
try:
with patch.object(Command, '_subprocess', side_effect = self.fake_subprocess) as mock_method:
options = self.get_options()
self.mgr = Main(options)
self.mgr.process()
except (Exception,) as e:
pdb.set_trace()
raise
My problem is two-fold.
First, the self in fake_subprocess refers to the UnitTest object, not the Command object. My use of the self_ parameter gets around that.
Second, in most cases, except for pip freeze I want to run the original subprocess, not the fake one.
Now, I can probably power through this by keeping an extra reference to Command._subprocess and using self_
But is there a more elegant way? Very naive when it comes to unittest.Mock.
This is what ended up working for me:
test-side
def fake_subprocess(self, cmd, fnp_o, self_):
try:
if self_.taskname != "freeze":
return self_._subprocess_actual(cmd, fnp_o, self_)
with open(fnp_o, self_.mode) as fo:
fo.write(self.fake_subprocess_payload["freeze"])
except (Exception,) as e:
raise
def test_001_scan(self):
try:
with patch.object(
Command, "_subprocess", side_effect=self.fake_subprocess
) as mock_method:
options = self.get_options()
self.mgr = Main(options)
self.mgr.process()
except (Exception,) as e:
raise
actual code-side
class Command(object):
def _subprocess(self, cmd, fnp_o, self_=None):
try:
fnp_stderr = self.mgr._get_fnp("log")
with open(fnp_stderr, "a") as ferr:
ferr.write("cmd: %s\nstderr begin:\n" % (cmd))
with open(fnp_o, self.mode) as fo:
proc = subprocess.check_call(
cmd.split(), stdout=fo, stderr=ferr, cwd=self.mgr.workdir
)
ferr.write("stderr end\n\n")
except (Exception,) as e:
if cpdb():
pdb.set_trace()
raise
_subprocess_actual = _subprocess
def run(self):
try:
t_cmd = self.config["cmdline"] # .replace(r"\\","\\")
t_fnp = os.path.join(self.mgr.workdir, self.config["filename"])
fnp_log = "subprocess.log"
cmd = sub_template(t_cmd, self, self.mgr.vars)
fnp_o = sub_template(t_fnp, self, self.mgr.vars)
self._subprocess(cmd=cmd, fnp_o=fnp_o, self_=self)
except (Exception,) as e:
if cpdb():
pdb.set_trace()
raise
This is a module from PubNub that I'm using to publish a message to a topic from an API. By design I've kept the PubNub object singleton.
class Pubnub:
instance = None
#classmethod
def get(cls):
if cls.instance is None:
cls.instance = cls()
return cls.instance
def __init__(self):
with open('config/config.yaml', 'r') as stream:
try:
conf = yaml.load(stream)
pnconfig = PNConfiguration()
pnconfig.subscribe_key = conf['pubnub']['publish_key']
pnconfig.publish_key = conf['pubnub']['subscribe_key']
pnconfig.ssl = False
self.pubnub = PubNub(pnconfig)
except yaml.YAMLError as e:
logger.error(str(e))
def publish(self, channel):
try:
envelope = self.pubnub.publish().channel(channel).message({
'message': True
}).sync()
print("publish timetoken: %d" % envelope.result.timetoken)
except PubNubException as e:
logger.error(str(e))
This is how I'm calling it,
class SendCommunityTextMessage(views.APIView):
def post(self, request, **kwargs):
try:
client_id = request.GET['client_id']
client_secret = request.GET['client_secret']
if Authenticator.authenticate_client(client_id, client_secret):
try:
//do something
try:
//do something more
pubbub = Pubnub.get()
pubbub.publish(receiver.hex_code)
return Response({"Success": CommunityTextMessageSerializer(message).data},
status=status.HTTP_200_OK)
except KeyError as e:
return Response({"Failure": str(e)}, status=status.HTTP_400_BAD_REQUEST)
except (User.DoesNotExist, CommunityRoom.DoesNotExist) as e:
return Response({"Failure": str(e)}, status=status.HTTP_404_NOT_FOUND)
else:
return Response({"Failure": "Invalid client"}, status=status.HTTP_403_FORBIDDEN)
except KeyError as _:
return Response({"Failure": "Probably a typo, read the docs to use this API."},
status=status.HTTP_400_BAD_REQUEST)
The issue is this slows down the API by minutes. How can I call the two lines,
pubbub = Pubnub.get()
pubbub.publish(receiver.hex_code)
asynchronously and return out of the view without waiting for the call to finish.
Thanks in anticipation.
I have this kind of code
class disable_file_system_redirection:
if mysystem == "Windows":
_disable = ctypes.windll.kernel32.Wow64DisableWow64FsRedirection
_revert = ctypes.windll.kernel32.Wow64RevertWow64FsRedirection
def __enter__(self):
self.old_value = ctypes.c_long()
self.success = self._disable(ctypes.byref(self.old_value))
def __exit__(self, type, value, traceback):
if self.success:
self._revert(self.old_value)
else:
pass
If test == “yes”:
with disable_file_system_redirection:
try:
“some code”
else:
try:
“same code”
As you can see I wrote the same code twice. I cannot merge those two same codes without getting errors. Is there a possible way to do something like that
If test = = “yes”:
with disable_file_system_redirection:
else:
pass #without disable_file_system_redirection:
“some code”
you can outsource your code into a function:
def code_to_do():
print("code_to_do")
if test == "yes":
with disable_file_system_redirection:
try:
code_to_do()
except Exception as e:
print(str(e))
else:
try:
code_to_do()
except Exception as e:
print(str(e))
I'm using the following code to get data from a website:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
urlopener = None
sitefound = 1
tried = 0
while (sitefound != 0) and tried < retries:
try:
urlopener = urllib2.urlopen(turl, None, timer)
sitefound = 0
except urllib2.URLError:
tried += 1
if urlopener: return urlopener
else: return None
[...]
urlopener = tryconnect('www.example.com')
if not urlopener:
return None
try:
for line in urlopener:
do stuff
except httplib.IncompleteRead:
print 'incomplete'
return None
except socket.timeout:
print 'socket'
return None
return stuff
Is there a way I can handle all these exceptions without having so much boilerplate code everytime?
Thanks!
You can avoid some boilerplate code in the first function too:
time_out = 4
def tryconnect(turl, timer=time_out, retries=10):
for tried in xrange(retries):
try:
return urllib2.urlopen(turl, None, timer)
except urllib2.URLError:
pass
return None
and in the second:
urlopener = tryconnect('www.example.com')
if urlopener:
try:
for line in urlopener:
do stuff
except (httplib.IncompleteRead, socket.timeout), e:
print e
return None
else:
return None