Can't export pandas dataframe to excel / encoding - python

I'm unable to export one of my dataframes due to some encoding difficulty.
sjM.dtypes
Customer Name object
Total Sales float64
Sales Rank float64
Visit_Frequency float64
Last_Sale datetime64[ns]
dtype: object
csv export works fine
path = 'c:\\test'
sjM.to_csv(path + '.csv') # Works
but the excel export fails
sjM.to_excel(path + '.xls')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "testing.py", line 338, in <module>
sjM.to_excel(path + '.xls')
File "c:\Anaconda\Lib\site-packages\pandas\core\frame.py", line 1197, in to_excel
excel_writer.save()
File "c:\Anaconda\Lib\site-packages\pandas\io\excel.py", line 595, in save
return self.book.save(self.path)
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 662, in save
doc.save(filename, self.get_biff_data())
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 637, in get_biff_data
shared_str_table = self.__sst_rec()
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 599, in __sst_rec
return self.__sst.get_biff_record()
File "c:\Anaconda\Lib\site-packages\xlwt\BIFFRecords.py", line 76, in get_biff_record
self._add_to_sst(s)
File "c:\Anaconda\Lib\site-packages\xlwt\BIFFRecords.py", line 91, in _add_to_sst
u_str = upack2(s, self.encoding)
File "c:\Anaconda\Lib\site-packages\xlwt\UnicodeUtils.py", line 50, in upack2
us = unicode(s, encoding)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x81 in position 22: ordinal not in range(128)
I know that the problem is coming from the 'Customer Name' column, as after deletion the export to excel works fine.
I've tried following advice from that question (Python pandas to_excel 'utf8' codec can't decode byte), using a function to decode and re-encode the offending column
def changeencode(data):
cols = data.columns
for col in cols:
if data[col].dtype == 'O':
data[col] = data[col].str.decode('latin-1').str.encode('utf-8')
return data
sJM = changeencode(sjM)
sjM['Customer Name'].str.decode('utf-8')
L2-00864 SETIA 2
K1-00279 BERKAT JAYA
L2-00664 TK. ANTO
BR00035 BRASIL JAYA,TK
RA00011 CV. RAHAYU SENTOSA
so the conversion to unicode appears to be successful
sjM.to_excel(path + '.xls')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "c:\Anaconda\Lib\site-packages\pandas\core\frame.py", line 1197, in to_excel
excel_writer.save()
File "c:\Anaconda\Lib\site-packages\pandas\io\excel.py", line 595, in save
return self.book.save(self.path)
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 662, in save
doc.save(filename, self.get_biff_data())
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 637, in get_biff_data
shared_str_table = self.__sst_rec()
File "c:\Anaconda\Lib\site-packages\xlwt\Workbook.py", line 599, in __sst_rec
return self.__sst.get_biff_record()
File "c:\Anaconda\Lib\site-packages\xlwt\BIFFRecords.py", line 76, in get_biff_record
self._add_to_sst(s)
File "c:\Anaconda\Lib\site-packages\xlwt\BIFFRecords.py", line 91, in _add_to_sst
u_str = upack2(s, self.encoding)
File "c:\Anaconda\Lib\site-packages\xlwt\UnicodeUtils.py", line 50, in upack2
us = unicode(s, encoding)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 22: ordinal not in range(128)
Why does it fails, even though the conversion to unicode appears to be successful ?
How can i work around this issue to export that dataframe to excel ?
#Jeff
Thanks for showing me the right direction
steps used :
install xlsxwriter (not bundled with pandas)
sjM.to_excel(path + '.xlsx', sheet_name='Sheet1', engine='xlsxwriter')

You need to use pandas >= 0.13, and the xlsxwriter engine for excel, which supports native unicode writing. xlwt, the default engine will support passing an encoding option will be available in 0.14.
see here for the engine docs.

Related

anaconda navigator stuck on loading applications

(base) C:\Windows\System32>anaconda-navigator
2022-10-25 21:01:52,124 - ERROR init.global_exception_logger:19
'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
Traceback (most recent call last):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 497, in setup
self.post_setup(conda_data=output)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\main_window_init_.py", line 525, in post_setup
self.tab_home.setup(conda_data)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 253, in setup
self.update_applications()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\widgets\tabs\home.py", line 292, in update_applications
self.api.process_apps(self._applications, prefix=self.current_prefix).values(),
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\anaconda_api.py", line 561, in process_apps
collected_applications: external_apps.ApplicationCollection = external_apps.get_applications(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps_init.py", line 49, in get_applications
apps: typing.Sequence[typing.Union[BaseApp, AppPatch]] = config_utils.load_configuration(context=context)
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 217, in load_configuration
return apply_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\config_utils.py", line 198, in apply_configuration
addition: typing.Union[None, base.BaseApp, base.AppPatch] = base.BaseApp.parse_configuration(
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 233, in parse_configuration
return target_cls._parse_configuration( # pylint: disable=protected-access
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 458, in _parse_configuration
result: BaseInstallableApp = BaseInstallableApp(
File "", line 17, in init
self.attrs_post_init()
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\base.py", line 378, in attrs_post_init
for location in self._detector(context=context):
File "D:\anaconda3\lib\site-packages\anaconda_navigator\api\external_apps\bundle\vscode_utils.py", line 58, in call
stdout, _, _ = conda_launch_utils.run_process([application.executable, '--version'])
File "D:\anaconda3\lib\site-packages\anaconda_navigator\utils\conda\launch.py", line 45, in run_process
stdout = ansi_utlils.escape_ansi(raw_stdout.decode())
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbb in position 0: invalid start byte
can anyone help me to solve this issue?

UnicodeEncodeError: Ascii codec can't encode character u2581 in position 0: ordinal not in range(128)

Traceback (most recent call last): File "train.py", line 18, in
tf.app.run(main=nmt.main, argv=[os.getcwd() + '\nmt\nmt\nmt.py'] + unparsed) File
"/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py",
line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 551, in
main
run_main(FLAGS, default_hparams, train_fn, inference_fn) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 544, in
run_main
train_fn(hparams, target_session=target_session) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 271, in
train
sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 142, in
run_full_eval
sample_src_data, sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 55, in
run_sample_decode
infer_model.batch_size_placeholder, summary_writer) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 454, in
_sample_decode
utils.print_out(b" src: " + utils.format_sentence(src_data[decode_id], hparams.subword_option))
File
"/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 193, in format_sentence
sentence = format_spm_text(sentence) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 181, in format_spm_text
return u"".join(format_text(symbols).decode("utf-8").split()).replace( File
"/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2581' in
position 0: ordinal not in range(128)
Traceback (most recent call last): File "train.py", line 18, in
tf.app.run(main=nmt.main, argv=[os.getcwd() + '\nmt\nmt\nmt.py'] + unparsed) File
"/usr/local/lib/python2.7/dist-packages/tensorflow/python/platform/app.py",
line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 551, in
main
run_main(FLAGS, default_hparams, train_fn, inference_fn) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/nmt.py", line 544, in
run_main
train_fn(hparams, target_session=target_session) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 271, in
train
sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 142, in
run_full_eval
sample_src_data, sample_tgt_data) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 55, in
run_sample_decode
infer_model.batch_size_placeholder, summary_writer) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/train.py", line 454, in
_sample_decode
utils.print_out(b" src: " + utils.format_sentence(src_data[decode_id], hparams.subword_option))
File
"/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 193, in format_sentence
sentence = format_spm_text(sentence) File "/home/paperspace/Desktop/nmt-chatbot/nmt/nmt/utils/misc_utils.py",
line 181, in format_spm_text
return u"".join(format_text(symbols).decode("utf-8").split()).replace( File
"/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
return codecs.utf_8_decode(input, errors, True) UnicodeEncodeError: 'ascii' codec can't encode character u'\u2581' in
position 0: ordinal not in range(128)
I am getting an error in these lines of code:
def format_spm_text(symbols):
"""Decode a text in SPM (https://github.com/google/sentencepiece)
format."""
return u"".join(format_text(symbols).decode("utf-8").split()).replace(
u"\u2581", u" ").strip().encode("utf-8")
I am trying to train a chatbot by running a file called 'train.py'. I use the command 'sudo python train.py' and my current python version in Ubuntu is version 3.6. On my local MacOS the exact same code seems to be working fine, but i am running Python version 2.7 on it.
Try out this:
def format_spm_text(symbols):
return u"".join(format_text(symbols).decode("unicode-escape").split()).replace(
u"\u2581", u" ").strip().encode("utf-8")

error opening xlsx files in python

I am trying to open an xlsx file that is created by another system (and this is the format in which the data always comes, and is not in my control). I tried both openpyxl (v2.3.2) and xlrd (v1.0.0) (as well as pandas (v0.20.1) read_excel and pd.ExcelFile(), both of which are using xlrd, and so may be moot), and I am running into errors; plus not finding answers from my searches. Any help is appreciated.
xlrd code:
import xlrd
workbook = xlrd.open_workbook(r'C:/Temp/Data.xlsx')
Error:
Traceback (most recent call last):
File "<ipython-input-3-9e5d87f720d0>", line 2, in <module>
workbook = xlrd.open_workbook(r'C:/Temp/Data.xlsx')
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\__init__.py", line 422, in open_workbook
ragged_rows=ragged_rows,
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 833, in open_workbook_2007_xml
x12sheet.process_stream(zflo, heading)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 548, in own_process_stream
self_do_row(elem)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 685, in do_row
self.sheet.put_cell(rowx, colx, None, float(tvalue), xf_index)
ValueError: could not convert string to float:
openpyxl code:
import openpyxl
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx')
Error:
Traceback (most recent call last):
File "<ipython-input-2-6083ad2bc875>", line 1, in <module>
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx')
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\excel.py", line 234, in load_workbook
parser.parse()
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 106, in parse
dispatcher[tag_name](element)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 243, in parse_row_dimensions
self.parse_cell(cell)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 188, in parse_cell
value = _cast_number(value)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 23, in _cast_number
return long(value)
ValueError: invalid literal for int() with base 10: ' '
pandas code:
import pandas as pd
df = pd.read_excel(r'C:/Temp/Data.xlsx', sheetname='Sheet1')
Error:
Traceback (most recent call last):
File "<ipython-input-5-b86ec98a4e9e>", line 2, in <module>
df = pd.read_excel(r'C:/Temp/Data.xlsx', sheetname='Sheet1')
File "C:\Program Files\Anaconda3\lib\site-packages\pandas\io\excel.py", line 200, in read_excel
io = ExcelFile(io, engine=engine)
File "C:\Program Files\Anaconda3\lib\site-packages\pandas\io\excel.py", line 257, in __init__
self.book = xlrd.open_workbook(io)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\__init__.py", line 422, in open_workbook
ragged_rows=ragged_rows,
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 833, in open_workbook_2007_xml
x12sheet.process_stream(zflo, heading)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 548, in own_process_stream
self_do_row(elem)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 685, in do_row
self.sheet.put_cell(rowx, colx, None, float(tvalue), xf_index)
ValueError: could not convert string to float:
For what its worth, here is an example snippet of the input file:
I am guessing that the errors are coming from the first row having blanks beyond the first column - because the errors vanish when I delete the first two rows and . I cannot skip the first two rows, because I want to extract the value in cell A1. I would also like to force the values read to be string type, and will later convert to float with error checking. thanks!
===========
Update(Aug 9 10AM EDT): Using Charlie's suggestion, was able to open excel file in read only mode; and was able to read most of the contents - but still running into an error somewhere.
new code (sorry it is not very pythonic - still a newbie):
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx', read_only=True)
ws = wb['Sheet1']
ws.max_row = ws.max_column = None
i=1
for row in ws.rows:
for cell in row:
if i<2000:
i += 1
try:
print(i, cell.value)
except:
print("error")
Error:
Traceback (most recent call last):
File "<ipython-input-65-2e8f3cf2294a>", line 2, in <module>
for row in ws.rows:
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\worksheet\read_only.py", line 125, in get_squared_range
yield tuple(self._get_row(element, min_col, max_col))
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\worksheet\read_only.py", line 165, in _get_row
value, data_type, style_id)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 36, in __init__
self.value = value
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 132, in value
value = _cast_number(value)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 23, in _cast_number
return long(value)
ValueError: invalid literal for int() with base 10: ' '
=========
Update2 (10:35AM): when i read the file without ws.max_row and ws.max_column set as None, the code was reading just one column, without errors. The value in cell A66 is "Generated from:". But when i read the file with ws.max_row and ws.max_column set as None, this particular cell is causing trouble. But I can read all other cells before that, and that will work fine for me, right now. thanks, #Charlie.
Sounds like the source file is probably corrupt and contains cells that with empty strings that are typed as numbers. You might be able to use openpyxl's read-only mode to skip the first tow rows.
If your program works after you delete the first two rows then lets skip them. try use skiprows to ignore the first 2 rows that are blanks or are headers. you can use the parse method from panda.
xls = pd.read_excel('C:/Temp/Data.xlsx')
df = xls.parse('Sheet1', skiprows=2) #assuming your data is on sheet1.

when i use the to_ categorical ,but have a MemoryError

when i use the lstm to comlete the multi-class label,i use the y_train as my label input.
the code below:
y_train = yuantrain['LOCF']
labels = to_categorical(np.array(y_train),286)
the error code hint:
Traceback (most recent call last):
File "<ipython-input-55-4ae3f21f520f>", line 1, in <module>
labels = to_categorical(np.array(y_train))
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\utils\np_utils.py",
line 24, in to_categorical
categorical = np.zeros((n, num_classes))
MemoryError
below is the y_train(5) :
In [65]:
y_train[0:5]
Out[65]:
0 620245
1 282
2 500004
3 620193
4 60119
Name: LOCF, dtype: int64
THAT I HAVE A TRY:
# biao qian chu li
# 处理标签为二进制,以及思考二进制的解码
labels = yuantrain["LOCF"].drop_duplicates()
#labels is the y_train's unique label
num_labels = len(labels) # (all of the unique labels)
one_hot = np.zeros((num_labels, num_labels), int)
np.fill_diagonal(one_hot, 1)
label_dict = dict(zip(labels, one_hot))
y_train = yuantrain['LOCF']
y_train = y_train.apply(lambda y: label_dict[y])
BUT I feel it is not easy for me, that i have to decode in only my way.
when i use my one-hot way,the keras also have error like below:
Train...
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x000000000D87B0F0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 235, in apply
sub_prof = optimizer.optimize(fgraph)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 87, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7289, in apply
new_outputs = self.optimizer(node)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7122, in local_fuse
tv = gof.op.get_test_value(ii)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 987, in get_test_value
return PureOp._get_test_value(v_var)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 580, in _get_test_value
detailed_err_msg = utils.get_variable_trace_string(v)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x000000000D87B0F0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 235, in apply
sub_prof = optimizer.optimize(fgraph)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\opt.py", line 87, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7289, in apply
new_outputs = self.optimizer(node)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\tensor\opt.py", line 7122, in local_fuse
tv = gof.op.get_test_value(ii)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 987, in get_test_value
return PureOp._get_test_value(v_var)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\op.py", line 580, in _get_test_value
detailed_err_msg = utils.get_variable_trace_string(v)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
Train on 100000 samples, validate on 77963 samples
Epoch 1/5
Traceback (most recent call last):
File "<ipython-input-67-5ce4b6739b03>", line 1, in <module>
runfile('E:/XIAMENproject/Prediction_Guo/count.py', wdir='E:/XIAMENproject/Prediction_Guo')
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile
execfile(filename, namespace)
File "C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py", line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "E:/XIAMENproject/Prediction_Guo/count.py", line 150, in <module>
validation_data=(x_val, y_val))
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\models.py", line 870, in fit
initial_epoch=initial_epoch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\engine\training.py", line 1507, in fit
initial_epoch=initial_epoch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\engine\training.py", line 1156, in _fit_loop
outs = f(ins_batch)
File "C:\ProgramData\Anaconda2\lib\site-packages\keras\backend\theano_backend.py", line 1196, in __call__
return self.function(*inputs)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\compile\function_module.py", line 805, in __call__
self.maker.inputs[i].variable)
File "C:\ProgramData\Anaconda2\lib\site-packages\theano\gof\utils.py", line 134, in get_variable_trace_string
return sio.getvalue()
File "C:\ProgramData\Anaconda2\lib\StringIO.py", line 271, in getvalue
self.buf += ''.join(self.buflist)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe8 in position 27: ordinal not in range(128)
I reset the tag ID to the continuous ID,and then use the to_categorical.

python - How to handle "old" dates when transfering data to excel

I have the dataframe where one of the columns contains date strings. I first convert it to datetime with:
mydf['Desk Date'] = pd.to_datetime(mydf['Desk Date'])`
and then drop the dataframe to excel with
Range('A1').value = mydf`
I get the following error:
Traceback (most recent call last):
File "C:\Program Files (x86)\Python271\lib\site-packages\IPython\core\interactiveshell.py", line 3035, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-111-6c6f5ea1ff17>", line 1, in <module>
Import.ImportFWD(test_path)
File "C:\Users\jastrzem\Downloads\pyWFP\Import.py", line 42, in ImportFWD
Range('A1').value = mydf
File "C:\Program Files (x86)\Python271\lib\site-packages\xlwings\main.py", line 818, in value
self.row1, self.col1, row2, col2), data)
File "C:\Program Files (x86)\Python271\lib\site-packages\xlwings\_xlwindows.py", line 151, in set_value
xl_range.Value = data
File "C:\Program Files (x86)\Python271\lib\site-packages\win32com\client\dynamic.py", line 560, in __setattr__
self._oleobj_.Invoke(entry.dispid, 0, invoke_type, 0, value)
com_error: (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2146827284), None)
One of the dates is Timestamp('1899-01-31 00:00:00')
which I think is the reason for the error.
I tried to use np.where to substitute all values before year 2000 to NaN, but with no luck.
f = lambda x: x.year
mydf['Desk Date'] = np.where(pd.DataFrame(mydf['Desk Date']).applymap(f) > 2000, pd.to_datetime(mydf['Desk Date'], format='%D/%M/%Y'),np.nan)
How can I fix the above command or alternatively how should I handle dates that are "not transferable" to excel?
Thanks!
[EDIT]:
I tried to use to_excel method but with no luck either. The code I put at the end of my function:
writer = pd.ExcelWriter('test7.xlsx', engine='xlsxwriter')
mydf.to_excel(writer, sheet_name = 'Sheet1')
writer.close()
it creates the file but it's empty. I get the following error:
Traceback (most recent call last):
File "C:\Program Files (x86)\Python271\lib\site-packages\IPython\core\interactiveshell.py", line 3035, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-26-6c6f5ea1ff17>", line 1, in <module>
Import.ImportFWD(test_path)
File "C:\Users\jastrzem\Downloads\pyWFP\Import.py", line 44, in ImportFWD
writer.close()
File "C:\Program Files (x86)\Python271\lib\site-packages\pandas\io\excel.py", line 623, in close
return self.save()
File "C:\Program Files (x86)\Python271\lib\site-packages\pandas\io\excel.py", line 1298, in save
return self.book.close()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\workbook.py", line 295, in close
self._store_workbook()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\workbook.py", line 518, in _store_workbook
xml_files = packager._create_package()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\packager.py", line 140, in _create_package
self._write_shared_strings_file()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\packager.py", line 280, in _write_shared_strings_file
sst._assemble_xml_file()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\sharedstrings.py", line 53, in _assemble_xml_file
self._write_sst_strings()
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\sharedstrings.py", line 83, in _write_sst_strings
self._write_si(string)
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\sharedstrings.py", line 110, in _write_si
self._xml_si_element(string, attributes)
File "C:\Program Files (x86)\Python271\lib\site-packages\xlsxwriter\xmlwriter.py", line 122, in _xml_si_element
self.fh.write("""<si><t%s>%s</t></si>""" % (attr, string))
File "C:\Program Files (x86)\Python271\lib\codecs.py", line 694, in write
return self.writer.write(data)
File "C:\Program Files (x86)\Python271\lib\codecs.py", line 357, in write
data, consumed = self.encode(object, self.errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x94 in position 26: ordinal not in range(128)
The error is not because of the old date, but because you are trying to throw a whole dataframe at a single cell.
Instead, use the to_excel method.
Excel will not accept dates before 1900. My workaround is to replace "old" dates with np.nan since I know they are data errors anyway.
mydf['Desk Date'] = pd.to_datetime(mydf['Desk Date'])
dates_list = list(mydf['Desk Date'])
dates_list = [x if x.year > 1900 else np.nan for x in dates_list ]
mydf['Desk Date'] = dates_list

Categories