Im using xlrd,xlwt,openpyxl for excel file manipulation
i have the method below, which goes over 30 files in folder and change specific column for each row in file (some file has 5,15,17,etc rows)
The method above can be run over and over and each file will be updated accordingly.
But,
Once i'm editing file (any of the files, and editing can be even small change in one of the cells (change from A to a for instance), the file cant be read by xlrd, and i'm getting the exception below.
Any idea why?
Exception:
Traceback (most recent call last):
File "/home/ohad/automationProj/automation-linux/0_master_code_prep.py", line 154, in
sanity_run_all_change()
File "/home/ohad/automationProj/automation-linux/0_master_code_prep.py", line 68, in sanity_run_all_change
ExcelWorkBook1 = open_workbook(config.STDFOLDER + '%s.xlsx'%suitename, on_demand=True)
File "/usr/local/lib/python2.7/dist-packages/xlrd/init.py", line 422, in open_workbook
ragged_rows=ragged_rows,
File "/usr/local/lib/python2.7/dist-packages/xlrd/xlsx.py", line 794, in open_workbook_2007_xml
x12sheet.process_stream(zflo, heading)
File "/usr/local/lib/python2.7/dist-packages/xlrd/xlsx.py", line 534, in own_process_stream
self.do_dimension(elem)
File "/usr/local/lib/python2.7/dist-packages/xlrd/xlsx.py", line 568, in do_dimension
rowx, colx = cell_name_to_rowx_colx(last_cell_ref)
File "/usr/local/lib/python2.7/dist-packages/xlrd/xlsx.py", line 91, in cell_name_to_rowx_colx
assert 0 <= colx < X12_MAX_COLS
AssertionError
Code:
def fullregression_run_all_change():
# FUll regression suites go over and change testes to Run
print "Go over Sanity suites and Change all Test to Run position"
ExcelWorkBook1 = open_workbook(config.UI_Suites_Location + 'STD_SUITES.xlsx', on_demand=True)
First_Sheet1 = ExcelWorkBook1.sheet_by_index(0)
Suite_List = []
for suitename in First_Sheet1._cell_values:
if suitename[1] == "Nightly Full Regression Run":
continue
else:
Suite_List.append(str(suitename[1]))
print "Full regression suites count is %s"%Suite_List.__len__()
for suitename in Suite_List:
ExcelWorkBook1 = open_workbook(config.STDFOLDER + '%s.xlsx'%suitename, on_demand=True)
First_Sheet1 = ExcelWorkBook1.sheet_by_index(0)
numberofrows=First_Sheet1.nrows
Startupdaterow=4
dest = config.STDFOLDER + suitename + ".xlsx"
wb = load_workbook(filename=dest)
ws = wb.get_active_sheet()
while Startupdaterow<=numberofrows:
ws.cell(row=Startupdaterow,column=8).value = 'RUN'
Startupdaterow +=1
wb.save(dest)
In my case the error was,
assert 1 <= nr <= self.utter_max_rows AssertionError
in .xls file in Python.
Visit this link to see my answer and do change in sheet.py for MAX-COLUMNS.
AssertionError with pandas when reading excel
I think you problem will be solved... :)
Related
What I want to do is to enter a string value into the excel sheet in two columns automatically when the user enters a value.
wb1 = xlwt.Workbook()
sheet1 = wb1.add_sheet("Sheet 1", cell_overwrite_ok=True)
iq = 1
pq = 2
for i in range(723):
sheet1.write(iq, 0, Employee_Name)
if Prediction_result == "{['No']}":
resultt = 'No'
iq = iq + 1
else:
resultt = 'Yes'
pq = pq + 1
sheet1.write(0, pq, resultt)
wb1.save('xlwt example.xls')
In the previous mode I was able to save the values into the sheet, but it was saving in the same cell, so I wanted it to be saved in the cell below the previous one, while doing that I got the following error:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Users\parth\AppData\Local\Programs\Python\Python39\lib\tkinter\__init__.py", line 1892, in __call__
return self.func(*args)
File "C:\Work\churn1\main.py", line 80, in values
sheet1.write(0, pq, resultt)
File "C:\Work\churn1\venv\lib\site-packages\xlwt\Worksheet.py", line 1088, in write
self.row(r).write(c, label, style)
File "C:\Work\churn1\venv\lib\site-packages\xlwt\Row.py", line 230, in write
self.__adjust_bound_col_idx(col)
File "C:\Work\churn1\venv\lib\site-packages\xlwt\Row.py", line 73, in __adjust_bound_col_idx
raise ValueError("column index (%r) not an int in range(256)" % arg)
ValueError: column index (256) not an int in range(256)
Python version = 3.9
I am just before finishing line and feel that i don't finish yet! I created and compiled all of the messages in order to have a site with 2 languages and i received this error when running the server: AttributeError: module 'locale' has no attribute 'normalize'.
Can someone please help me?
Traceback (most recent call last):
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/manage.py", line 15, in <module>
execute_from_command_line(sys.argv)
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/venv/lib/python3.6/site-packages/django/core/management/__init__.py", line 371, in execute_from_command_line
utility.execute()
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/venv/lib/python3.6/site-packages/django/core/management/__init__.py", line 306, in execute
parser = CommandParser(None, usage="%(prog)s subcommand [options] [args]", add_help=False)
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/venv/lib/python3.6/site-packages/django/core/management/base.py", line 47, in __init__
super().__init__(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/argparse.py", line 1633, in __init__
self._positionals = add_group(_('positional arguments'))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 606, in gettext
return dgettext(_current_domain, message)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 570, in dgettext
codeset=_localecodesets.get(domain))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 505, in translation
mofiles = find(domain, localedir, languages, all=True)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 477, in find
for nelang in _expand_lang(lang):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 206, in _expand_lang
loc = locale.normalize(loc)
AttributeError: module 'locale' has no attribute 'normalize'
Process finished with exit code 1
This is how my locale folder looks like:
This is the function from gettext.py refered to the error. Seems the first line has the error:
def _expand_lang(loc):
loc = locale.normalize(loc)
COMPONENT_CODESET = 1 << 0
COMPONENT_TERRITORY = 1 << 1
COMPONENT_MODIFIER = 1 << 2
# split up the locale into its base components
mask = 0
pos = loc.find('#')
if pos >= 0:
modifier = loc[pos:]
loc = loc[:pos]
mask |= COMPONENT_MODIFIER
else:
modifier = ''
pos = loc.find('.')
if pos >= 0:
codeset = loc[pos:]
loc = loc[:pos]
mask |= COMPONENT_CODESET
else:
codeset = ''
pos = loc.find('_')
if pos >= 0:
territory = loc[pos:]
loc = loc[:pos]
mask |= COMPONENT_TERRITORY
else:
territory = ''
language = loc
ret = []
for i in range(mask+1):
if not (i & ~mask): # if all components for this combo exist ...
val = language
if i & COMPONENT_TERRITORY: val += territory
if i & COMPONENT_CODESET: val += codeset
if i & COMPONENT_MODIFIER: val += modifier
ret.append(val)
ret.reverse()
return ret
Later Edit: I've deleted the init files and now i got this error:
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/manage.py", line 8, in <module>
from django.core.management import execute_from_command_line
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/venv/lib/python3.6/site-packages/django/core/management/__init__.py", line 12, in <module>
from django.core.management.base import (
File "/Users/ionutcohen/Dropbox/PycharmProjects/chn/venv/lib/python3.6/site-packages/django/core/management/base.py", line 7, in <module>
from argparse import ArgumentParser
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/argparse.py", line 93, in <module>
from gettext import gettext as _, ngettext
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/gettext.py", line 409
advance to next entry in the seek tables
^
SyntaxError: invalid syntax
Process finished with exit code 1
I had the same. In PyCharm I had made a directory "locale" (with a __init__.py for no reason whatsoever) for my internationalisation and got this message:
AttributeError: module 'locale' has no attribute 'normalize'
This is due to the fact it is looking for a Python module "locale" but that is masked by your Django directory "locale".
Either remove the __init__.py and/or rename your directory "locale". I did both... "To stitch something twice is to stich it well" as we say in Holland.
It seems that everywhere on the net people use "locale" as directory for their internationlization. Hmm.. it now seems to me a bit like making a class with the name "Class" or a SQL table with the name "Table". I have seen in done, at times it works, I would never recommend it though.
I am trying to open an xlsx file that is created by another system (and this is the format in which the data always comes, and is not in my control). I tried both openpyxl (v2.3.2) and xlrd (v1.0.0) (as well as pandas (v0.20.1) read_excel and pd.ExcelFile(), both of which are using xlrd, and so may be moot), and I am running into errors; plus not finding answers from my searches. Any help is appreciated.
xlrd code:
import xlrd
workbook = xlrd.open_workbook(r'C:/Temp/Data.xlsx')
Error:
Traceback (most recent call last):
File "<ipython-input-3-9e5d87f720d0>", line 2, in <module>
workbook = xlrd.open_workbook(r'C:/Temp/Data.xlsx')
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\__init__.py", line 422, in open_workbook
ragged_rows=ragged_rows,
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 833, in open_workbook_2007_xml
x12sheet.process_stream(zflo, heading)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 548, in own_process_stream
self_do_row(elem)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 685, in do_row
self.sheet.put_cell(rowx, colx, None, float(tvalue), xf_index)
ValueError: could not convert string to float:
openpyxl code:
import openpyxl
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx')
Error:
Traceback (most recent call last):
File "<ipython-input-2-6083ad2bc875>", line 1, in <module>
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx')
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\excel.py", line 234, in load_workbook
parser.parse()
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 106, in parse
dispatcher[tag_name](element)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 243, in parse_row_dimensions
self.parse_cell(cell)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\reader\worksheet.py", line 188, in parse_cell
value = _cast_number(value)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 23, in _cast_number
return long(value)
ValueError: invalid literal for int() with base 10: ' '
pandas code:
import pandas as pd
df = pd.read_excel(r'C:/Temp/Data.xlsx', sheetname='Sheet1')
Error:
Traceback (most recent call last):
File "<ipython-input-5-b86ec98a4e9e>", line 2, in <module>
df = pd.read_excel(r'C:/Temp/Data.xlsx', sheetname='Sheet1')
File "C:\Program Files\Anaconda3\lib\site-packages\pandas\io\excel.py", line 200, in read_excel
io = ExcelFile(io, engine=engine)
File "C:\Program Files\Anaconda3\lib\site-packages\pandas\io\excel.py", line 257, in __init__
self.book = xlrd.open_workbook(io)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\__init__.py", line 422, in open_workbook
ragged_rows=ragged_rows,
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 833, in open_workbook_2007_xml
x12sheet.process_stream(zflo, heading)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 548, in own_process_stream
self_do_row(elem)
File "C:\Program Files\Anaconda3\lib\site-packages\xlrd\xlsx.py", line 685, in do_row
self.sheet.put_cell(rowx, colx, None, float(tvalue), xf_index)
ValueError: could not convert string to float:
For what its worth, here is an example snippet of the input file:
I am guessing that the errors are coming from the first row having blanks beyond the first column - because the errors vanish when I delete the first two rows and . I cannot skip the first two rows, because I want to extract the value in cell A1. I would also like to force the values read to be string type, and will later convert to float with error checking. thanks!
===========
Update(Aug 9 10AM EDT): Using Charlie's suggestion, was able to open excel file in read only mode; and was able to read most of the contents - but still running into an error somewhere.
new code (sorry it is not very pythonic - still a newbie):
wb = openpyxl.load_workbook(r'C:/Temp/Data.xlsx', read_only=True)
ws = wb['Sheet1']
ws.max_row = ws.max_column = None
i=1
for row in ws.rows:
for cell in row:
if i<2000:
i += 1
try:
print(i, cell.value)
except:
print("error")
Error:
Traceback (most recent call last):
File "<ipython-input-65-2e8f3cf2294a>", line 2, in <module>
for row in ws.rows:
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\worksheet\read_only.py", line 125, in get_squared_range
yield tuple(self._get_row(element, min_col, max_col))
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\worksheet\read_only.py", line 165, in _get_row
value, data_type, style_id)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 36, in __init__
self.value = value
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 132, in value
value = _cast_number(value)
File "C:\Program Files\Anaconda3\lib\site-packages\openpyxl\cell\read_only.py", line 23, in _cast_number
return long(value)
ValueError: invalid literal for int() with base 10: ' '
=========
Update2 (10:35AM): when i read the file without ws.max_row and ws.max_column set as None, the code was reading just one column, without errors. The value in cell A66 is "Generated from:". But when i read the file with ws.max_row and ws.max_column set as None, this particular cell is causing trouble. But I can read all other cells before that, and that will work fine for me, right now. thanks, #Charlie.
Sounds like the source file is probably corrupt and contains cells that with empty strings that are typed as numbers. You might be able to use openpyxl's read-only mode to skip the first tow rows.
If your program works after you delete the first two rows then lets skip them. try use skiprows to ignore the first 2 rows that are blanks or are headers. you can use the parse method from panda.
xls = pd.read_excel('C:/Temp/Data.xlsx')
df = xls.parse('Sheet1', skiprows=2) #assuming your data is on sheet1.
I am trying to append the values from one sheet row by row to a new workbook. My code works when I run it on a small test file, but when I run it on my target file it returns an error when saving.
Here is my code:
from openpyxl import load_workbook
from openpyxl import Workbook
wb = load_workbook(filename='RM Activity-Pricing Report - 2014-05-31.xlsm',keep_vba=False, data_only=True)
ws_Ottawa = wb.get_sheet_by_name('Ottawa')
wb2 = Workbook()
ws2 = wb2.create_sheet()
for row in ws_Ottawa.iter_rows():
ws2.append(row)
wb2.save('new_big_file.xlsx')
The output error I get in Spyder (python 3.5) is:
Traceback (most recent call last):
File "<ipython-input-22-171ffbcd4891>", line 1, in <module>
runfile('Z:/Revenue Management Report/ExtractPromoData.py', wdir='Z:/Revenue Management Report')
File "C:\Anaconda3-64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "C:\Anaconda3-64\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 88, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "Z:/Revenue Management Report/ExtractPromoData.py", line 35, in <module>
wb2.save('new_big_file4.xlsx')
File "C:\Anaconda3-64\lib\site-packages\openpyxl\workbook\workbook.py", line 298, in save
save_workbook(self, filename)
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\excel.py", line 198, in save_workbook
writer.save(filename, as_template=as_template)
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\excel.py", line 181, in save
self.write_data(archive, as_template=as_template)
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\excel.py", line 87, in write_data
self._write_worksheets(archive)
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\excel.py", line 114, in _write_worksheets
write_worksheet(sheet, self.workbook.shared_strings,
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\worksheet.py", line 233, in write_worksheet
write_rows(xf, worksheet)
File "C:\Anaconda3-64\lib\site-packages\openpyxl\writer\lxml_worksheet.py", line 59, in write_rows
if cell.value is None and not cell.has_style:
File "C:\Anaconda3-64\lib\site-packages\openpyxl\cell\cell.py", line 306, in value
if value is not None and self.is_date:
File "C:\Anaconda3-64\lib\site-packages\openpyxl\cell\cell.py", line 351, in is_date
if self.data_type == "n" and self.number_format != "General":
File "C:\Anaconda3-64\lib\site-packages\openpyxl\styles\styleable.py", line 49, in __get__
return coll[idx - 164]
IndexError: list index out of range
I do not get an error when I use my code on a smaller test .xlsx file.
Possible reasons for the problem that I suspect are:
1)input file is .xlsm
2)input file is has columns from A to CI
3)input file is password protected (but since the error is in saving this does not seem like it should be an issue)
Taking into account what Charlie said, this is my work-around
from openpyxl import load_workbook
from openpyxl import Workbook
wb = load_workbook(filename='RM Activity-Pricing Report - 2014-5-31.xlsm',keep_vba=False, data_only=True)#,guess_types=True)
ws_Ottawa = wb.get_sheet_by_name('Ottawa')
wb2 = Workbook()
ws2 = wb2.create_sheet()
counter = 0
new_rows = []
for rrow in ws_Ottawa.iter_rows():
new_rows.append([])
for cell in rrow:
new_rows[counter].append(cell.value)
counter +=1
for wrow in new_rows:
ws2.append(wrow)
wb2.save('new_big_file4.xlsx')
print("ALL DONE")
You quite simply cannot do what you are trying to do. Unfortunately, the way data is stored within the file formats means that much relevant information is not stored with the cell but using reference to the workbook object. These obviously differ from workbook to workbook which is why you see errors when saving: the number format you want to use doesn't exist in the new file.
I am trying to automate a report using openpyxl and am in the early stages. I have just set up the first part whereby i copy and paste a range of cells from one workbook to another. However when i go to save the workbook i am pasting into it returns a key error. It also corrupts my template file and renders it un-openable.
I have found a chap who has had the same problem however his was not resolved. Someone has instructed him to use version 1.8.5 however i could only find upto 1.8.2.
ERROR: Failure: KeyError (-8937945243006069197)
----------------------------------------------------------------------
Traceback (most recent call last):
File "//anaconda/lib/python2.7/site-packages/nose/loader.py", line 413, in loadTestsFromName
addr.filename, addr.module)
File "//anaconda/lib/python2.7/site-packages/nose/importer.py", line 47, in importFromPath
return self.importFromDir(dir_path, fqname)
File "//anaconda/lib/python2.7/site-packages/nose/importer.py", line 94, in importFromDir
mod = load_module(part_fqname, fh, filename, desc)
File "/Users/joegavin/Dropbox/Python/projects/project pymur/tests/sandbox_tests.py", line 53, in <module>
wb0.save('template.xlsx')
File "//anaconda/lib/python2.7/site-packages/openpyxl/workbook.py", line 265, in save
save_workbook(self, filename)
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/excel.py", line 187, in save_workbook
writer.save(filename)
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/excel.py", line 170, in save
self.write_data(archive)
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/excel.py", line 98, in write_data
self._write_worksheets(archive, shared_string_table, self.style_writer)
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/excel.py", line 128, in _write_worksheets
style_writer.get_style_by_hash()))
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/worksheet.py", line 98, in write_worksheet
write_worksheet_cols(doc, worksheet, style_table)
File "//anaconda/lib/python2.7/site-packages/openpyxl/writer/worksheet.py", line 209, in write_worksheet_cols
col_def['style'] = str(style_table[hash(columndimension.style_index)])
KeyError: -8937945243006069197
Here is all my code so far.
from openpyxl import load_workbook
from openpyxl import workbook
########################### CLASSES ####################################
class Institution(object):
def __init__(self, name, usage_hours,percentage_share,
target_percentage, number_of_projects):
self.name = name
self.usage_hours = usage_hours
self.percentage_share = percentage_share
self.target_percentage = target_percentage
self.number_of_projects = number_of_projects
class Project(object):
def __init__(self, project_name, project_code, percentage_share,
percentage_share_machine):
self.project_name = project_name
self.percentage_share = percentage_share
self.percentage_share_machine = percentage_share_machine
############################ Setup ########################################
# Loading the workbook to read the figures off
wb0 = load_workbook('figures.xlsx')
wb1 = load_workbook('template.xlsx')
# Navigating to the worksheet where we harvest the data
ws0 = wb0.worksheets[0]
# Navigating to the worksheet we will copy data to
ws1 = wb1.worksheets[0]
project_codes0 = ws0["B1":"B100"]
project_codes1 = ws1['B1':'B100']
#cells1 = cells0
#for i in range(0, len(cells0[0])):
#for cell in cells0:
#print cell[i].value
wb0.save('template.xlsx')
As per OP's comment
The problem was resolved by updating to the newest version 2.02