How to connect to HDFS Cluster using snakebite-py3

How to connect to HDFS Cluster using snakebite-py3 - python

I am trying to connect to an HDFS Cluster using python code, library(snakebite-py3) and I see that when I set use_sasl to True I am getting the following error:
Code Snippet:
from snakebite.client import Client
client = Client(host='hostname', port=8020,
effective_user='user', use_sasl=True)
for x in client.ls(['/']):
print(x,"\n")
Error:
---------------------------------------------------------------------------
GSSError Traceback (most recent call last)
<ipython-input-21-62c8b8df16ea> in <module>
2 from snakebite.client import Client
3
----> 4 client = Client(host='hostname',port=8020, effective_user='user', use_sasl=True)
5
6 for x in client.ls(['/test_abha']): print(x,"\n")
C:\ProgramData\Anaconda3\lib\site-packages\snakebite\client.py in __init__(self, host, port, hadoop_version, use_trash, effective_user, use_sasl, hdfs_namenode_principal, sock_connect_timeout, sock_request_timeout, use_datanode_hostname)
126 self.hdfs_namenode_principal = hdfs_namenode_principal
127 self.service_stub_class = client_proto.ClientNamenodeProtocol_Stub
--> 128 self.service = RpcService(self.service_stub_class, self.port, self.host, hadoop_version,
129 effective_user,self.use_sasl, self.hdfs_namenode_principal,
130 sock_connect_timeout, sock_request_timeout)
C:\ProgramData\Anaconda3\lib\site-packages\snakebite\service.py in __init__(self, service_stub_class, port, host, hadoop_version, effective_user, use_sasl, hdfs_namenode_principal, sock_connect_timeout, sock_request_timeout)
30
31 # Setup the RPC channel
---> 32 self.channel = SocketRpcChannel(host=self.host, port=self.port, version=hadoop_version,
33 effective_user=effective_user, use_sasl=use_sasl,
34 hdfs_namenode_principal=hdfs_namenode_principal,
C:\ProgramData\Anaconda3\lib\site-packages\snakebite\channel.py in __init__(self, host, port, version, effective_user, use_sasl, hdfs_namenode_principal, sock_connect_timeout, sock_request_timeout)
193 raise FatalException("Kerberos libs not found. Please install snakebite using 'pip install snakebite[kerberos]'")
194
--> 195 kerberos = Kerberos()
196 self.effective_user = effective_user or kerberos.user_principal()
197 else:
C:\ProgramData\Anaconda3\lib\site-packages\snakebite\kerberos.py in __init__(self)
41 class Kerberos:
42 def __init__(self):
---> 43 self.credentials = gssapi.Credentials(usage='initiate')
44
45 def user_principal(self):
C:\ProgramData\Anaconda3\lib\site-packages\gssapi\creds.py in __new__(cls, base, token, name, lifetime, mechs, usage, store)
61 base_creds = rcred_imp_exp.import_cred(token)
62 else:
---> 63 res = cls.acquire(name, lifetime, mechs, usage,
64 store=store)
65 base_creds = res.creds
C:\ProgramData\Anaconda3\lib\site-packages\gssapi\creds.py in acquire(cls, name, lifetime, mechs, usage, store)
134
135 if store is None:
--> 136 res = rcreds.acquire_cred(name, lifetime,
137 mechs, usage)
138 else:
gssapi/raw/creds.pyx in gssapi.raw.creds.acquire_cred()
GSSError: Major (851968): Unspecified GSS failure. Minor code may provide more information, Minor (39756044): Credential cache is empty
Please kindly suggest, thank you.

Related

how can I avoid the StopIteration note in python

I keep getting the StopIteration note when performing my code, my output is just to test if the code works as the whole thing involves retrieving data using an api. My code is showed as followed:
import pandas as pd
import json
import requests
from socketIO_client import SocketIO
TRADING_API_URL = 'https://api-demo.fxcm.com:443'
WEBSOCKET_PORT = 443
ACCESS_TOKEN = "ba08382c61b2b35f258e8ea64dcf4928c4263053"
def on_connect():
print('websocket Connected: ' + socketIO._engineIO_session.id)
def on_close():
print('websocket Closed.')
socketIO = SocketIO(TRADING_API_URL, WEBSOCKET_PORT, params={'access_token': ACCESS_TOKEN})
socketIO.on('connect', on_connect)
socketIO.on('disconnect', on_close)
Bearer_access_token = "Bearer " + socketIO._engineIO_session.id + ACCESS_TOKEN
print(Bearer_access_token)
The notes I get are below:
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_26108/1749199291.py in <module>
14 print('websocket Closed.')
15
---> 16 socketIO = SocketIO(TRADING_API_URL, WEBSOCKET_PORT, params={'access_token': ACCESS_TOKEN})
17
18 socketIO.on('connect', on_connect)
~\anaconda3\lib\site-packages\socketIO_client\__init__.py in __init__(self, host, port, Namespace, wait_for_connection, transports, resource, hurry_interval_in_seconds, **kw)
349 self._callback_by_ack_id = {}
350 self._ack_id = 0
--> 351 super(SocketIO, self).__init__(
352 host, port, Namespace, wait_for_connection, transports,
353 resource, hurry_interval_in_seconds, **kw)
~\anaconda3\lib\site-packages\socketIO_client\__init__.py in __init__(self, host, port, Namespace, wait_for_connection, transports, resource, hurry_interval_in_seconds, **kw)
52 if Namespace:
53 self.define(Namespace)
---> 54 self._transport
55
56 # Connect
~\anaconda3\lib\site-packages\socketIO_client\__init__.py in _transport(self)
60 if self._opened:
61 return self._transport_instance
---> 62 self._engineIO_session = self._get_engineIO_session()
63 self._negotiate_transport()
64 self._connect_namespaces()
~\anaconda3\lib\site-packages\socketIO_client\__init__.py in _get_engineIO_session(self)
73 self._http_session, self._is_secure, self._url)
74 try:
---> 75 engineIO_packet_type, engineIO_packet_data = next(
76 transport.recv_packet())
77 break
StopIteration:
Is there anything I can add (or take away) from my code to help stop this issue, without affecting the desired output? Thanks

python atlasapi authentication

I'm trying to authenticate to Atlas with the atlasapi. I'm using my google account and get the error ErrAtlasUnauthorized: Authentication is required with the below method. Is google auth supported or am I doing something wrong?
from atlasapi.atlas import Atlas
auth = Atlas("foo#google.com","<password>","<groupId>")
clusters = auth.Clusters.get_all_clusters
print (clusters())
full trace:
ErrAtlasUnauthorized Traceback (most recent call last)
<ipython-input-61-d69a101fdf69> in <module>
1 clusters = auth.Clusters.get_all_clusters
----> 2 print (clusters())
C:\...\atlasapi\atlas.py in get_all_clusters(self, pageNum, itemsPerPage, iterable)
129
130 uri = Settings.api_resources["Clusters"]["Get All Clusters"] % (self.atlas.group, pageNum, itemsPerPage)
--> 131 return self.atlas.network.get(Settings.BASE_URL + uri)
132
133 def get_single_cluster(self, cluster: str) -> dict:
C:\...\atlasapi\network.py in get(self, uri)
144 logger.debug("Auth information = {} {}".format(self.user, self.password))
145
--> 146 return self.answer(r.status_code, r.json())
147
148 except Exception:
C:\...\atlasapi\network.py in answer(self, c, details)
68 raise ErrAtlasBadRequest(c, details)
69 elif c == Settings.UNAUTHORIZED:
---> 70 raise ErrAtlasUnauthorized(c, details)
71 elif c == Settings.FORBIDDEN:
72 raise ErrAtlasForbidden(c, details)
ErrAtlasUnauthorized: Authentication is required

The API access keys are your User/Password.

ecoinvent 3.5 import error: MaybeEncodingError:

I was trying to import ecoinvent 3.5 cutoff to a project using brightway, with the following:
if 'ecoinvent 3.5 cutoff' not in databases:
ei35cutofflink=r"H:\Data\ecoinvent 3.5_cutoff_lci_ecoSpold02\datasets"
ei35cutoff=SingleOutputEcospold2Importer(ei35cutofflink, 'ecoinvent 3.5 cutoff')
ei35cutoff.apply_strategies()
ei35cutoff.statistics()
ei35cutoff.write_database()
But I got the following error. It looks like the issue is not that related to brightway, but rather multiprocessing or pickle? I don't understand what the error message means.
---------------------------------------------------------------------------
MaybeEncodingError Traceback (most recent call last)
<ipython-input-4-f9acb2bc0c84> in <module>
1 if 'ecoinvent 3.5 cutoff' not in databases:
2 ei35cutofflink=r"H:\Data\ecoinvent 3.5_cutoff_lci_ecoSpold02\datasets"
----> 3 ei35cutoff=SingleOutputEcospold2Importer(ei35cutofflink, 'ecoinvent 3.5 cutoff')
4 ei35cutoff.apply_strategies()
5 ei35cutoff.statistics()
C:\miniconda3_py37\envs\ab\lib\site-packages\bw2io\importers\ecospold2.py in __init__(self, dirpath, db_name, extractor, use_mp, signal)
63 start = time()
64 try:
---> 65 self.data = extractor.extract(dirpath, db_name, use_mp=use_mp)
66 except RuntimeError as e:
67 raise MultiprocessingError('Multiprocessing error; re-run using `use_mp=False`'
C:\miniconda3_py37\envs\ab\lib\site-packages\bw2io\extractors\ecospold2.py in extract(cls, dirpath, db_name, use_mp)
91 ) for x in filelist
92 ]
---> 93 data = [p.get() for p in results]
94 else:
95 pbar = pyprind.ProgBar(len(filelist), title="Extracting ecospold2 files:", monitor=True)
C:\miniconda3_py37\envs\ab\lib\site-packages\bw2io\extractors\ecospold2.py in <listcomp>(.0)
91 ) for x in filelist
92 ]
---> 93 data = [p.get() for p in results]
94 else:
95 pbar = pyprind.ProgBar(len(filelist), title="Extracting ecospold2 files:", monitor=True)
C:\miniconda3_py37\envs\ab\lib\multiprocessing\pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x000001D257C55358>'. Reason: 'TypeError("can't pickle lxml.etree._ListErrorLog objects")'```

Use can use use_mp=False to get a sense of what the actual error is (instead of the error not being pickle-able, and this raising a separate errror). In this case I think you have a problem with the data folder, which you can solve by deleting it and downloading or extracting it again.

cannot import name 'universaldetector'

I am trying to use https://github.com/dowjones/dj-dna-streams-python/tree/master/dnaStreaming . It's a package to receive news streams from Dow Jones. When I try to "listen" to the streams I receive the following error:
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-36-372f4305a9e1> in <module>()
1 while True:
----> 2 listener.listen(callback, maximum_messages=4, subscription_id=subscription_id)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\dj_dna_streaming_python-1.0.10-py3.5.egg\dnaStreaming\listener.py in listen(self, on_message_callback, maximum_messages, subscription_id)
21 def listen(self, on_message_callback, maximum_messages=DEFAULT_UNLIMITED_MESSAGES, subscription_id=None):
22 limit_pull_calls = not (maximum_messages == self.DEFAULT_UNLIMITED_MESSAGES)
---> 23 pubsub_client = pubsub_service.get_client(self.config)
24
25 subscription_id = subscription_id if subscription_id is not None else self.config.subscription()
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\dj_dna_streaming_python-1.0.10-py3.5.egg\dnaStreaming\services\pubsub_service.py in get_client(config)
7
8
----> 9 def get_client(config):
10 streaming_credentials = credentials_service.fetch_credentials(config)
11 credentials = authentication_service.get_authenticated_oauth_credentials(streaming_credentials)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\dj_dna_streaming_python-1.0.10-py3.5.egg\dnaStreaming\services\credentials_service.py in fetch_credentials(config)
11 response = _get_requests().get(config.credentials_uri(), headers=headers)
12
---> 13 streaming_credentials_string = json.loads(response.text)['data']['attributes']['streaming_credentials']
14
15 return json.loads(streaming_credentials_string)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\models.py in text(self)
824
825 self._content_consumed = True
--> 826 # don't need to release the connection; that's been handled by urllib3
827 # since we exhausted the data.
828 return self._content
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\models.py in apparent_encoding(self)
694 is **not** a check to see if the response code is ``200 OK``.
695 """
--> 696 try:
697 self.raise_for_status()
698 except HTTPError:
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\chardet\__init__.py in detect(aBuf)
ImportError: cannot import name 'universaldetector'
I understand that the key part is that I can't import universaldetector. Any idea why is that? I have seen this answer but can't really relate to my problem. I have upgraded chardet and requests.
I am on Python3 and win . Executing code in Jupyter Notebook.

Using Sacred Module with iPython

I am trying to set up sacred for Python and I am going through the tutorial. I was able to set up sacred using pip install sacred with no issues. I am having trouble running the basic code:
from sacred import Experiment
ex = Experiment("hello_world")
Running this code returns the a ValueError:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-25-66f549cfb192> in <module>()
1 from sacred import Experiment
2
----> 3 ex = Experiment("hello_world")
/Users/ryandevera/anaconda/lib/python2.7/site-packages/sacred/experiment.pyc in __init__(self, name, ingredients)
42 super(Experiment, self).__init__(path=name,
43 ingredients=ingredients,
---> 44 _caller_globals=caller_globals)
45 self.default_command = ""
46 self.command(print_config, unobserved=True)
/Users/ryandevera/anaconda/lib/python2.7/site-packages/sacred/ingredient.pyc in __init__(self, path, ingredients, _caller_globals)
48 self.doc = _caller_globals.get('__doc__', "")
49 self.sources, self.dependencies = \
---> 50 gather_sources_and_dependencies(_caller_globals)
51
52 # =========================== Decorators ==================================
/Users/ryandevera/anaconda/lib/python2.7/site-packages/sacred/dependencies.pyc in gather_sources_and_dependencies(globs)
204 def gather_sources_and_dependencies(globs):
205 dependencies = set()
--> 206 main = Source.create(globs.get('__file__'))
207 sources = {main}
208 experiment_path = os.path.dirname(main.filename)
/Users/ryandevera/anaconda/lib/python2.7/site-packages/sacred/dependencies.pyc in create(filename)
61 if not filename or not os.path.exists(filename):
62 raise ValueError('invalid filename or file not found "{}"'
---> 63 .format(filename))
64
65 mainfile = get_py_file_if_possible(os.path.abspath(filename))
ValueError: invalid filename or file not found "None"
I am not sure why this error is returning. The documentation does not say anything about setting up an Experiment file prior to running the code. Any help would be greatly appreciated!

The traceback given indicates that the constructor for Experiment searches its namespace to find the file in which its defined.
Thus, to make the example work, place the example code into a file and run that file directly.
If you are using ipython, then you could always try using the %%python command, which will effectively capture the code you give it into a file before running it (in a separate python process).

According to the docs, if you're in IPython/Jupyter, you can allow the Experiment to run in a non-reproducible interactive environment:
ex = Experiment('jupyter_ex', interactive=True)
https://sacred.readthedocs.io/en/latest/experiment.html#run-the-experiment

The docs say it nicely (TL;DR: sacred checks this for you and fails in order to warn you)
Warning
By default, Sacred experiments will fail if run in an interactive
environment like a REPL or a Jupyter Notebook. This is an intended
security measure since in these environments reproducibility cannot be
ensured. If needed, this safeguard can be deactivated by passing
interactive=True to the experiment like this:
ex = Experiment('jupyter_ex', interactive=True)

Setting interactive=True doesn't work if you run the notebook as a script through ipython.
$ ipython code.ipynb
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
Cell In[1], line 1
----> 1 ex = Experiment("image_classification", interactive=True)
2 ex.observers.append(NeptuneObserver(run=neptune_run))
File ~\miniconda3\envs\py38\lib\site-packages\sacred\experiment.py:119, in Experiment.__init__(self, name, ingredients, interactive, base_dir, additional_host_info, additional_cli_options, save_git_info)
117 elif name.endswith(".pyc"):
118 name = name[:-4]
--> 119 super().__init__(
120 path=name,
121 ingredients=ingredients,
122 interactive=interactive,
123 base_dir=base_dir,
124 _caller_globals=caller_globals,
125 save_git_info=save_git_info,
126 )
127 self.default_command = None
128 self.command(print_config, unobserved=True)
File ~\miniconda3\envs\py38\lib\site-packages\sacred\ingredient.py:75, in Ingredient.__init__(self, path, ingredients, interactive, _caller_globals, base_dir, save_git_info)
69 self.save_git_info = save_git_info
70 self.doc = _caller_globals.get("__doc__", "")
71 (
72 self.mainfile,
73 self.sources,
74 self.dependencies,
---> 75 ) = gather_sources_and_dependencies(
76 _caller_globals, save_git_info, self.base_dir
77 )
78 if self.mainfile is None and not interactive:
79 raise RuntimeError(
80 "Defining an experiment in interactive mode! "
81 "The sourcecode cannot be stored and the "
82 "experiment won't be reproducible. If you still"
83 " want to run it pass interactive=True"
84 )
File ~\miniconda3\envs\py38\lib\site-packages\sacred\dependencies.py:725, in gather_sources_and_dependencies(globs, save_git_info, base_dir)
723 def gather_sources_and_dependencies(globs, save_git_info, base_dir=None):
724 """Scan the given globals for modules and return them as dependencies."""
--> 725 experiment_path, main = get_main_file(globs, save_git_info)
727 base_dir = base_dir or experiment_path
729 gather_sources = source_discovery_strategies[SETTINGS["DISCOVER_SOURCES"]]
File ~\miniconda3\envs\py38\lib\site-packages\sacred\dependencies.py:596, in get_main_file(globs, save_git_info)
594 main = None
595 else:
--> 596 main = Source.create(globs.get("__file__"), save_git_info)
461 return Source(main_file, get_digest(main_file), repo, commit, is_dirty)
File ~\miniconda3\envs\py38\lib\site-packages\sacred\dependencies.py:382, in get_py_file_if_possible(pyc_name)
380 if pyc_name.endswith((".py", ".so", ".pyd")):
381 return pyc_name
--> 382 assert pyc_name.endswith(".pyc")
383 non_compiled_file = pyc_name[:-1]
384 if os.path.exists(non_compiled_file):
sacred==0.8.2

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to connect to HDFS Cluster using snakebite-py3 - python

Related

how can I avoid the StopIteration note in python

python atlasapi authentication

ecoinvent 3.5 import error: MaybeEncodingError:

cannot import name 'universaldetector'

Using Sacred Module with iPython

Categories

Resources