UnicodeDecodeError from piping .txt file with cat - python

I am using cat test.txt | and piping the lines of that file into my python program. However, I am getting a UnicodeDecodeError:
Traceback
Traceback (most recent call last):
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/runpy.py", line 170, in _run_module_as_main
"__main__", mod_spec)
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/nfsdata/DSCluster/home/bli1/qe-trinity-functional/qe/trinity/hdfs2datacase.py", line 102, in <module>
sys.exit(main(sys.argv))
File "/nfsdata/DSCluster/home/bli1/qe-trinity-functional/qe/trinity/hdfs2datacase.py", line 72, in main
for rawline in fileinput.input(args.targets):
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/fileinput.py", line 265, in __next__
line = self.readline()
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/fileinput.py", line 370, in readline
self._buffer = self._file.readlines(self._bufsize)
File "/tmp/python3_bli1/lib/python3.5/codecs.py", line 321, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xae in position 1971: invalid start byte
Line 72 is the for loop line
Here is how I handle the input stream:
for rawline in fileinput.input(args.targets, openhook=fileinput.hook_encoded("utf-8")):
try:
jobj = json.loads(rawline)
except ValueError as e:
log.warn("invalid json on input line %d", line_count)
except UnicodeDecodeError as e:
log.warn("Funky characters", line_count)
else:
if case_obj is not None:
case_str = json.dumps(case_obj, sort_keys=True, indent=4)
if first_case:
print("[")
first_case = False
else:
print(",")
print(case_str)
finally:
pass

Related

anaconda navigator stuck on loading applications

(base) C:\Windows\System32>anaconda-navigator
2022-10-25 21:01:52,124 - ERROR init.global_exception_logger:19
'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
Traceback (most recent call last):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 497, in setup
self.post_setup(conda_data=output)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 525, in post_setup
self.tab_home.setup(conda_data)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 253, in setup
self.update_applications()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 292, in update_applications
self.api.process_apps(self._applications, prefix=self.current_prefix).values(),
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\anaconda_api.py", line 561, in process_apps
collected_applications: external_apps.ApplicationCollection = external_apps.get_applications(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps_init.py", line 49, in get_applications
apps: typing.Sequence[typing.Union[BaseApp, AppPatch]] = config_utils.load_configuration(context=context)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 217, in load_configuration
return apply_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 198, in apply_configuration
addition: typing.Union[None, base.BaseApp, base.AppPatch] = base.BaseApp.parse_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 233, in parse_configuration
return target_cls._parse_configuration( # pylint: disable=protected-access
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 458, in _parse_configuration
result: BaseInstallableApp = BaseInstallableApp(
File "", line 17, in init
self.attrs_post_init()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 378, in attrs_post_init
for location in self._detector(context=context):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\bundle\vscode_utils.py", line 58, in call
stdout, _, _ = conda_launch_utils.run_process([application.executable, '--version'])
File "D:\anaconda3\lib\site-packages\anaconda_navigator\utils\conda\launch.py", line 45, in run_process
stdout = ansi_utlils.escape_ansi(raw_stdout.decode())
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
can anyone help me to solve this issue?

Python Logging UnicodeEncodeError

Logging error I can not print id to log
--- Logging error ---
Traceback (most recent call last):
File "C:\Anaconda3\envs\NLP_Research\lib\logging\__init__.py", line 1028, in emit
stream.write(msg + self.terminator)
File "C:\Anaconda3\envs\NLP_Research\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\uff1a' in position 80: character maps to <undefined>
Call stack:
File "preprocess.py", line 173, in
test_out = get_out(processor, './input/data/test.txt', args, id2label, 'test')
File "preprocess.py", line 149, in get_out
out = convert_examples_to_features(examples, args.max_seq_len, args.bert_dir)
File "preprocess.py", line 119, in convert_examples_to_features
tokenizer=tokenizer,
File "preprocess.py", line 93, in convert_bert_example
logger.info(f"ids:{ids}")
logger.info(f"ids:{ids}")
Message: 'ids:[165, 175, 73, 79]'
Arguments: ()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 125: invalid continuation byte

I am reading in a file with 50000 rows and values:
2.711569 2.295490 4.141002
...
And numpy.loadtxt does a good job reading them and doing some calculations. Once i call my function :
c = setupc(2,n) # setups of cov matrix
with n = 499999
it leads me to the dispatcher:
def _compile_for_args(self, *args, **kws):
"""
For internal use. Compile a specialized version of the function
for the given *args* and *kws*, and return the resulting callable.
"""
assert not kws
def error_rewrite(e, issue_type):
"""
Rewrite and raise Exception `e` with help supplied based on the
specified issue_type.
"""
if config.SHOW_HELP:
help_msg = errors.error_extras[issue_type]
e.patch_message('\n'.join((str(e).rstrip(), help_msg)))
if config.FULL_TRACEBACKS:
raise e
else:
reraise(type(e), e, None)
argtypes = []
for a in args:
if isinstance(a, OmittedArg):
argtypes.append(types.Omitted(a.value))
else:
argtypes.append(self.typeof_pyval(a))
try:
return self.compile(tuple(argtypes))
and gives the error:
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "C:\Anaconda3\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Anaconda3\lib\site-packages\spyder_kernels\console\__main__.py", line 11, in <module>
start.main()
File "C:\Anaconda3\lib\site-packages\spyder_kernels\console\start.py", line 318, in main
kernel.start()
File "C:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 583, in start
self.io_loop.start()
File "C:\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 153, in start
self.asyncio_loop.run_forever()
File "C:\Anaconda3\lib\asyncio\base_events.py", line 538, in run_forever
self._run_once()
File "C:\Anaconda3\lib\asyncio\base_events.py", line 1782, in _run_once
handle._run()
File "C:\Anaconda3\lib\asyncio\events.py", line 100, in _run
self._loop.call_exception_handler(context)
File "C:\Anaconda3\lib\asyncio\base_events.py", line 1647, in call_exception_handler
exc_info=True)
File "C:\Anaconda3\lib\logging\__init__.py", line 1407, in error
self._log(ERROR, msg, args, **kwargs)
File "C:\Anaconda3\lib\logging\__init__.py", line 1514, in _log
self.handle(record)
File "C:\Anaconda3\lib\logging\__init__.py", line 1524, in handle
self.callHandlers(record)
File "C:\Anaconda3\lib\logging\__init__.py", line 1594, in callHandlers
lastResort.handle(record)
File "C:\Anaconda3\lib\logging\__init__.py", line 894, in handle
self.emit(record)
File "C:\Anaconda3\lib\logging\__init__.py", line 1033, in emit
self.handleError(record)
File "C:\Anaconda3\lib\logging\__init__.py", line 947, in handleError
traceback.print_exception(t, v, tb, None, sys.stderr)
File "C:\Anaconda3\lib\traceback.py", line 104, in print_exception
type(value), value, tb, limit=limit).format(chain=chain):
File "C:\Anaconda3\lib\traceback.py", line 521, in __init__
self._load_lines()
File "C:\Anaconda3\lib\traceback.py", line 533, in _load_lines
self.__context__._load_lines()
File "C:\Anaconda3\lib\traceback.py", line 533, in _load_lines
self.__context__._load_lines()
File "C:\Anaconda3\lib\traceback.py", line 533, in _load_lines
self.__context__._load_lines()
[Previous line repeated 14 more times]
File "C:\Anaconda3\lib\traceback.py", line 531, in _load_lines
frame.line
File "C:\Anaconda3\lib\traceback.py", line 285, in line
self._line = linecache.getline(self.filename, self.lineno).strip()
File "C:\Anaconda3\lib\linecache.py", line 16, in getline
lines = getlines(filename, module_globals)
File "C:\Anaconda3\lib\linecache.py", line 47, in getlines
return updatecache(filename, module_globals)
File "C:\Anaconda3\lib\linecache.py", line 137, in updatecache
lines = fp.readlines()
File "C:\Anaconda3\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc3 in position 125: invalid continuation byte
I hope somebody can help me
EDIT:
Function setupc:
def setupc(m,n):
c = np.zeros((m,m))
for i in range(1,m+1):
for j in range(1,m+1):
# Heaviside
if (i + j - n - 2 >= 0):
heav = 1.0
else:
heav = 0.0
c[i-1,j-1] = (
2.*min(i,j) * (1.+3.*i*j - min(i,j)**2) / (n - min(i,j) +1)
+ (min(i,j)**2 - min(i,j)**4) / ((n-i+1.)*(n-j+1.))
+ heav * ((n+1.-i-j)**4 - (n+1.-i-j)**2) / ((n-i+1.)*(n-j+1.))
)
return c

UnicodeEncodeError: Ascii codec can't encode character u2581 in position 0: ordinal not in range(128)

Traceback (most recent call last): File "train.py", line 18, in
tf.app.run(main=nmt.main, argv=[os.getcwd() + '\nmt\nmt\nmt.py'] + unparsed) File
"/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py",
line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 551, in
main
run_main(FLAGS, default_hparams, train_fn, inference_fn) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 544, in
run_main
train_fn(hparams, target_session=target_session) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 271, in
train
sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 142, in
run_full_eval
sample_src_data, sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 55, in
run_sample_decode
infer_model.batch_size_placeholder, summary_writer) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 454, in
_sample_decode
utils.print_out(b" src: " + utils.format_sentence(src_data[decode_id], hparams.subword_option))
File
"/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 193, in format_sentence
sentence = format_spm_text(sentence) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 181, in format_spm_text
return u"".join(format_text(symbols).decode("utf-8").split()).replace( File
"/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2581' in
position 0: ordinal not in range(128)
Traceback (most recent call last): File "train.py", line 18, in
tf.app.run(main=nmt.main, argv=[os.getcwd() + '\nmt\nmt\nmt.py'] + unparsed) File
"/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py",
line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 551, in
main
run_main(FLAGS, default_hparams, train_fn, inference_fn) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 544, in
run_main
train_fn(hparams, target_session=target_session) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 271, in
train
sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 142, in
run_full_eval
sample_src_data, sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 55, in
run_sample_decode
infer_model.batch_size_placeholder, summary_writer) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 454, in
_sample_decode
utils.print_out(b" src: " + utils.format_sentence(src_data[decode_id], hparams.subword_option))
File
"/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 193, in format_sentence
sentence = format_spm_text(sentence) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 181, in format_spm_text
return u"".join(format_text(symbols).decode("utf-8").split()).replace( File
"/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2581' in
position 0: ordinal not in range(128)
I am getting an error in these lines of code:
def format_spm_text(symbols):
"""Decode a text in SPM (https://github.com/google/sentencepiece)
format."""
return u"".join(format_text(symbols).decode("utf-8").split()).replace(
u"\u2581", u" ").strip().encode("utf-8")
I am trying to train a chatbot by running a file called 'train.py'. I use the command 'sudo python train.py' and my current python version in Ubuntu is version 3.6. On my local MacOS the exact same code seems to be working fine, but i am running Python version 2.7 on it.
Try out this:
def format_spm_text(symbols):
return u"".join(format_text(symbols).decode("unicode-escape").split()).replace(
u"\u2581", u" ").strip().encode("utf-8")

when i use the to_ categorical ,but have a MemoryError

when i use the lstm to comlete the multi-class label,i use the y_train as my label input.
the code below:
y_train = yuantrain['LOCF']
labels = to_categorical(np.array(y_train),286)
the error code hint:
Traceback (most recent call last):
File "<ipython-input-55-4ae3f21f520f>", line 1, in <module>
labels = to_categorical(np.array(y_train))
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\utils\np_utils.py",
line 24, in to_categorical
categorical = np.zeros((n, num_classes))
MemoryError
below is the y_train(5) :
In [65]:
y_train[0:5]
Out[65]:
0 620245
1 282
2 500004
3 620193
4 60119
Name: LOCF, dtype: int64
THAT I HAVE A TRY:
# biao qian chu li
# 处理标签为二进制,以及思考二进制的解码
labels = yuantrain["LOCF"].drop_duplicates()
#labels is the y_train's unique label
num_labels = len(labels) # (all of the unique labels)
one_hot = np.zeros((num_labels, num_labels), int)
np.fill_diagonal(one_hot, 1)
label_dict = dict(zip(labels, one_hot))
y_train = yuantrain['LOCF']
y_train = y_train.apply(lambda y: label_dict[y])
BUT I feel it is not easy for me, that i have to decode in only my way.
when i use my one-hot way,the keras also have error like below:
Train...
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x000000000D87B0F0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 235, in apply
sub_prof = optimizer.optimize(fgraph)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 87, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7289, in apply
new_outputs = self.optimizer(node)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7122, in local_fuse
tv = gof.op.get_test_value(ii)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 987, in get_test_value
return PureOp._get_test_value(v_var)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 580, in _get_test_value
detailed_err_msg = utils.get_variable_trace_string(v)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x000000000D87B0F0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 235, in apply
sub_prof = optimizer.optimize(fgraph)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 87, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7289, in apply
new_outputs = self.optimizer(node)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7122, in local_fuse
tv = gof.op.get_test_value(ii)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 987, in get_test_value
return PureOp._get_test_value(v_var)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 580, in _get_test_value
detailed_err_msg = utils.get_variable_trace_string(v)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
Train on 100000 samples, validate on 77963 samples
Epoch 1/5
Traceback (most recent call last):
File "<ipython-input-67-5ce4b6739b03>", line 1, in <module>
runfile('E:/XIAMENproject/Prediction_Guo/count.py', wdir='E:/XIAMENproject/Prediction_Guo')
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/XIAMENproject/Prediction_Guo/count.py", line 150, in <module>
validation_data=(x_val, y_val))
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\models.py", line 870, in fit
initial_epoch=initial_epoch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\engine\training.py", line 1507, in fit
initial_epoch=initial_epoch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\engine\training.py", line 1156, in _fit_loop
outs = f(ins_batch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\backend\theano_backend.py", line 1196, in __call__
return self.function(*inputs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\compile\function_module.py", line 805, in __call__
self.maker.inputs[i].variable)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
I reset the tag ID to the continuous ID,and then use the to_categorical.

Categories