Modify Named Table in Excel File with Python openpyxl - python

I want to add columns to an existing table in an excel file.
Therefore I wan't to use python and the openpyxl library.
Right now I use a class when it is initialising, it is connecting to the file.
Afterwards I call the check_for_column function and when the column is not existing it should create it. And in the end of the script I save the file.
import os
from openpyxl import load_workbook
from openpyxl.worksheet.table import Table, TableColumn, range_boundaries
from openpyxl.utils.cell import get_column_letter
class ExcelHandler:
_wb_name = None
_table = None
_wb = None
def __init__(self):
self._wb_name = os.getenv('EXCEL_FULLPATH')
self._wb = load_workbook(filename=self._wb_name, keep_vba=True)
sheet = self._wb['DataInbox']
self._table = sheet.tables['WebPageForms']
return
def check_for_column(self, column_name):
if not column_name in self._table.column_names:
lst_ids = [my_object.id for my_object in self._table.tableColumns]
new_id = lst_ids[-1]+1
# change range of table
min_col, min_row, max_col, max_row = range_boundaries(
self._table.ref)
max_col += 1
mx = get_column_letter(max_col)
mn = get_column_letter(min_col)
self._table.ref = '{}{}:{}{}'.format(mn, min_row, mx, max_row)
# add column to table
tc = TableColumn(id=new_id, name=column_name)
self._table.tableColumns.append(tc)
return
def save_wb(self):
self._wb.save(self._wb_name)
return
The code runs fine as shown. Although when I then try to open the file with excel it gives me an alert saying:
We found a problem with some content in ’file.xlsm’. Do you want us to try to recover as much as we can? If you trust the source of this workbook, click Yes.
This is the repair result of excel when I press yes
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<recoveryLog xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"><logFileName>Repair Result to file.xml</logFileName><summary>Errors were detected in file ’*path*/file.xlsm’</summary><repairedRecords summary="Following is a list of repairs:"><repairedRecord>Repaired Records: Table from /xl/tables/table1.xml part (Table)</repairedRecord></repairedRecords></recoveryLog>
I would highly appreciate If anyone could help me

Ok, I found the problem why the excel file is corrupt, my bad.
when I create the column in the table, I also have to write the name in the respective cell:
def check_for_column(self, column_name):
***
# write name in cell of new column header
self._ws.cell(row=min_row, column=max_col).value = column_name
***
return
If I add this to the code, my table is modified correctly

Related

How do I convert a spreadsheet read with OpenPyXL into XlsxWriter?

TL; DR
How do I load a spreadsheet template using OpenPyXL
workbook = openpyxl.load_workbook('template.xlsx')
then create a new one and fill the rows data with XlsxWriter?
xls_workbook = xlsxwriter.Workbook('proposal.xlsx')
# Missing step
xls_workbook = workbook.add_worksheet(workbook)
xls_workbook.write(CELL_COST, cost)
Details
I have a spreadsheet with several tabs with a quite complex formatting (used as a sales proposal), so there is a lot of formatting that must be preserved.
So, instead of programatically create the spreadsheet, I have to:
Open the template for reading using OpenPyXL
Extract the template and save it into a template.py file
Create the sales pitch spreadsheets using the template, creating new tables with XlsxWriter.
Is there a simple way of extracting the template or do really I need recreate the whole layout programmatically?
I have run into the same "problem" and have not found much about it. So finally I code a little function to copy the template sheet and some styling.
It's 2 years late but I hope it helps someone who comes to this question.
def openpyxl_to_xlsxwriter(openpyxl_workbook, xlsxwriter_workbook):
template_sheets = openpyxl_workbook.get_sheet_names()
# Copy all the sheets from the openpyxl template to the xlsxwriter workbook.
for sheet_name in template_sheets:
xlsxwriter_workbook.add_worksheet(sheet_name)
for sheet in template_sheets:
openpyxl_active_sheet = openpyxl_workbook.get_sheet_by_name(sheet)
xlsxwriter_active_sheet = xlsxwriter_workbook.get_worksheet_by_name(sheet)
for row in openpyxl_active_sheet.rows:
for cell in row:
# # Copy some STYLES # #
# Copy font color and Background color
cell_style = {}
theme = cell.fill.start_color.theme
tint = cell.fill.start_color.tint
font_theme = cell.font.color.theme
font_tint = cell.font.color.tint
font_color = theme_and_tint_to_rgb(openpyxl_workbook, font_theme, font_tint)
cell_style['font_color'] = font_color
try:
background_color = theme_and_tint_to_rgb(openpyxl_workbook, theme, tint)
cell_style['bg_color'] = background_color
except:
pass
cell_format = xlsxwriter_workbook.add_format(cell_style)
# Copy border
border_left = cell.border.left.border_style
border_right = cell.border.right.border_style
border_top = cell.border.top.border_style
border_bottom = cell.border.bottom.border_style
if border_left:
cell_format.set_left()
if border_right:
cell_format.set_right()
if border_top:
cell_format.set_top()
if border_bottom:
cell_format.set_bottom()
# Copy Cell Width and Height
cell_height = openpyxl_active_sheet.row_dimensions[cell.row].height
cell_width = openpyxl_active_sheet.column_dimensions[cell.column_letter].width
column_index = cell.column - 1
xlsxwriter_active_sheet.set_column(column_index, column_index, cell_width)
row_index = cell.row - 1
xlsxwriter_active_sheet.set_row(row_index, cell_height)
# Copy Cell Value
xlsxwriter_active_sheet.write(cell.coordinate, cell.value, cell_format)

Adding decimal point, additional row w/ headers and rearranging columns in pythron script

I have the following code below which takes a specific type of .xml file and converts to a more reader friendly .csv file.
You can see what the reader friendly .csv looks like in the two linked .csv file output links below.
A few things I would like to add/change, but need help with altering my code to do so.
1.) I'd like to add one decimal position to column H so instead of reading say 20, I want it to read as 2.0. I'd also like to add one decimal position to any row that has CF in it, i.e. cell AE1, AG1, AI1, etc.
2.) I'd like to insert a row above row 1 where I can type in row headers. For example, H1 reads as T2. I want to insert a row above H1 that will say Accel./Decel. Time [s]. I'd like to be able to do this for every row in the spreadsheet.
3.) I'd like to rearrange the column order of the outputted .csv file. I'd like to move columns AD - BQ to where column I is.
Any help with any of these would be greatly appreciated. Thanks!
.csv file output
.csv file output1
from lxml import etree
import os
import pandas as pd
import collections
ProcessRecipe = collections.namedtuple('ProcessRecipe', ['recipe_number', 'recipe_steps'])
class File:
def __init__(self, fp):
fp_wo_quotes = fp.replace('"', '')
if os.path.isfile(fp_wo_quotes):
self.fp = fp_wo_quotes
else:
self.fp = None
#property
def parent_folder(self):
return os.path.dirname(self.fp)
def new_fp(self, file_name):
return os.path.join(self.parent_folder, file_name)
class MPCProcessRecipe:
RECIPE_TYPE = 'MPCProcessRecipe'
def __init__(self, fp):
"""
constructor
:param fp: absolute file path to a recipe file
"""
self.tree = etree.parse(fp)
#staticmethod
def get_columns(step_data):
"""
Collect attributes from a step data element for CSV column header
:param step_data: step data element
:return: column names as a list of strings
"""
_columns = ['Step']
for data_element in step_data.xpath('./Data'):
# <Data name="Repeat">0</Data> // without index
# <Data name="MR" index="1"/> // with index
_name = data_element.get('name')
_index = data_element.get('index')
column_name = _name + _index if _index else _name
_columns.append(column_name)
# print('columns', _columns)
return _columns
#staticmethod
def get_data(step_data_list):
data = []
for step_data in step_data_list:
name = step_data.tag
step = step_data.get('step')
values = []
for _data_element in step_data.xpath('./Data'):
_text = _data_element.text
if _text:
values.append(_text)
else:
values.append('')
datum = [name + step] + values
# print('datum', datum)
data.append(datum)
return data
#property
def process_recipe_list(self):
"""
Loop through <Recipe> elements and populate DataFrames
:return: list of ProcessRecipe namedtuple
"""
pr_list = []
for recipe in self.tree.xpath('/Recipes/Recipe'):
# <Recipe number="994" type="MPCProcessRecipe">
data =[]
if recipe.get('type') == self.RECIPE_TYPE:
recipe_number = recipe.get('number')
# <PreStepData step="1">
# <StepData step="1">
pre_step_data_list = recipe.xpath('./RecipeData/PreStepData')
step_data_list = recipe.xpath('./RecipeData/StepData')
# create columns from the first entry of StepData
columns = MPCProcessRecipe.get_columns(step_data_list[0])
if len(pre_step_data_list) > 0:
data += MPCProcessRecipe.get_data(pre_step_data_list)
data += MPCProcessRecipe.get_data(step_data_list)
df = pd.DataFrame(data=data, columns=columns)
pr = ProcessRecipe(recipe_number, df)
pr_list.append(pr)
return pr_list
def convert_xml_files_to_csv_files():
fp_input = input('D&D a recipe file: ')
file_obj = File(fp_input)
print('File Path:', file_obj.fp)
rcp = MPCProcessRecipe(file_obj.fp)
pr_list = rcp.process_recipe_list
for pr in pr_list:
out_fp = file_obj.new_fp(pr.recipe_number + '.csv')
pr.recipe_steps.to_csv(out_fp)
if __name__ == '__main__':
convert_xml_files_to_csv_files()

multiple value from mysql to reportlab

i have an issue that i can't show my second record in mysql table. The report just showing 1 record in a row and the second one isn't show on pdf file. i'm using reportlab for report generator on python 2.7
this is my code that i can' fix yet :
def nilaisql():
rpt = raw_input("input NPM : ")
sql = "select nama, tanggal, jamMasuk from t_"+rpt
curs.execute(sql)
result = curs.fetchall()
for row in result:
c = canvas.Canvas("Sampelone.pdf")
c.drawString(250, 700, str(row[0]))
c.save()
os.system("Sampelone.pdf")
this is my record on mysql. I want to show the second row record but the pdf just showing the first row record
it should showing the second row record
and this is the result on my pdf file
i'm getiing stuck in here and if you know something i'm really grateful that you can share the solution in here
for row in result:
c = canvas.Canvas("Sampelone.pdf")
c.drawString(250, 700, str(row[0]))
c.save()
what your code snippet is doing is, creating a new file and writing the content of your variable row into the pdf file and c.save saves it. In the next iteration, this same file is recreated which is blank and the original file is overwritten by this blank file and the content of row is printed that is why you will always see only first row record in it.
This should work fine. Increment or Decrement the value of y according to your use and document height.
c = canvas.Canvas("Sampelone.pdf") #creates a pdf
for row in result:
c.drawString(250, y, str(row[0])) #writes data at given co-ordinates
c.save() #saves the pdf
This way you can see all row records in the pdf.
But this practice is not considered good, you should always put your data in flowable like this. .
from reportlab.lib import styles
from reportlab.platypus import SimpleDocTemplate, Paragraph
def nilaisql():
pdfname = 'mydoc.pdf'
doc = SimpleDocTemplate(
pdfname
)
style = styles["Normal"]
story = []
rpt = raw_input("input NPM : ")
sql = "select nama, tanggal, jamMasuk from t_" + rpt
curs.execute(sql)
result = curs.fetchall()
for row in result:
story.append(Paragraph(row, style))
doc.build(
story
)
os.system("Sampelone.pdf")
Read more about flowables in reportlab-userguide

extend xlsxwriter.Worksheet() class

I currently override the xlsxwriter.Workbook, called rpt.Workbook. Would like to add method to xlsxwriter.Worksheet(), but since xlsxwriter.Workbook() imports Worksheet,not sure how this can be done without major convulsions. Currently, I have to pass the worksheet object as an argument.
Seems like I need to write override methods for xlsxwriter.Workbook() to point to a custom class of xlsxwriter.Worksheet() , but can't figure out how.
Here is the current override rpt.Workbook() being used:
####################################################################
class Workbook(xlsxwriter.Workbook):
####################################################################
"""\nSpreadsheet class provides methods to build a spreadsheet.
"""
####################################################################
def __init__(self,filename=None, options={}):
####################################################################
try:
filename = rpt.fname(filename)
except FileNotFoundError as err:
log.error(err)
return False
log.info("Initializing excel file " + filename)
super().__init__(filename,options)
####################################################################
def add_tab(self,name=None,data=None,header=None,
srow=0,scol=0,autocol=True):
####################################################################
"""\nProvides method to add_worksheet and add_table in 1 call.
Required Attribute args:
name = TAB name
header = list of header names
data = list of lists for spreadsheet contents
Optional Attribute args:
srow = starting row for table, default 0
scol = starting col for table, default 0
autocol = True/False, auto set the column sizes
add_tab also adds the worksheet.header attribute to
allow the set_col_by_name function to work
"""
if not data:
log.warning("data=[][] required")
return None
if not header:
log.warning("header=[] required")
return False
columns = []
for field in header:
columns.append({ 'header' : field })
worksheet = self.add_worksheet(name)
worksheet.header = header
tableinfo= {
'data' : data,
'columns' : columns
}
lastcol = scol + (len(header) - 1)
lastrow = srow + (len(data) + 1)
worksheet.add_table(srow,scol,lastrow,lastcol,tableinfo)
#if autocol:
#self.auto_set_columns(worksheet=worksheet,data=data,scol=scol)
worksheet.freeze_panes(0,1)
return worksheet
####################################################################
def auto_set_columns(self,worksheet=None,data=None,header=None,scol=0):
####################################################################
"""\nDetermines the max length of each column and then set
that column width.
Required Attribute args:
worksheet = worksheet object
data = list of lists data
Optional Attribute args:
scol = Column start
header = row of headers for data list of lists.
If header not specified, worksheet
must have been created with self.add_tab()
"""
if not header and worksheet.header:
header = worksheet.header
## table = [] list of lists, combine header and data
table = []
table.append(header) for row in data:
table.append(row)
ziptable = list(zip (*table))
colsizes = []
for idx,val in enumerate(table[0]):
size = max(len(s) for s in ziptable[idx])
colnum = idx + scol
log.debug("Setting column => {} col size => {} => {}".format(colnum,val,size))
worksheet.set_column(colnum,colnum,size)
I want to add a method to xlsxwriter.Worksheet() called auto_set_columns(). Currently I have to pass the worksheet object (worksheet=worksheet) as an object to get this to work.I would like to utilize worksheet.auto_set_columns() instead. Currently auto_set_columns() is a method of rpt.Workbook.
Would like auto_set_columns() to be an extended method of xlsxwriter.Worksheet.
The script side utilization currently looks like this and works:
excelfile = nashomes + '/nas_homes.xlsx'
spreadsheet = rpt.Workbook(excelfile)
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
spreadsheet.auto_set_columns(worksheet=worksheet,data=hrpt.data,scol=0)
What I desire, notice the last line changes:
excelfile = nashomes + '/nas_homes.xlsx'
spreadsheet = rpt.Workbook(excelfile)
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
worksheet.auto_set_columns(data=hrpt.data,scol=0)
Goal desired here is that worksheet object (which is xlsxwriter.Worksheet() ) can have an extended "auto_set_columns" method. However, since the worksheet object is created from a add_worksheet() method in the xlsxwriter.Workbook() class, I can't figure out how to extend xlsxwriter.Worksheet() without major override methods to xlsxwriter.Workbook() also. How can I get xlsxwriter.Workbook() to reference my extended xlsxwriter.Worksheet() when Workbook.add_worksheet() creates the Worksheet object?
How about monkey patching the worksheet before returning it from add_tab?
First create the standalone function outside of any class definitions:
import types
def auto_set_columns(self,data=None,header=None,scol=0):
if not header and self.header:
header = self.header
## table = [] list of lists, combine header and data
table = []
table.append(header)
for row in data:
table.append(row)
ziptable = list(zip (*table))
colsizes = []
for idx,val in enumerate(table[0]):
size = max(len(s) for s in ziptable[idx])
colnum = idx + scol
print "Setting column => {} col size => {} => {}".format(colnum,val,size)
self.set_column(colnum,colnum,size)
And then inside your Worksheet.add_tab function, patch in the method before returning:
....
worksheet.freeze_panes(0,1)
worksheet.auto_set_columns = types.MethodType( auto_set_columns, worksheet )
return worksheet
Now you should be able to run:
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
worksheet.auto_set_columns(data=hrpt.data,scol=0)

pandas: Writing to an existing excel file (xlsx) using to_excel

I have a simple use case for df.to_excel() that I'm struggling with. I want to write to a specific worksheet tab (let's call it "Data") of an existing XLSX workbook, which could be referenced by formulas and pivots on other tabs.
I've tried to modify ExcelWriter in two ways but both produce errors from openpyxl.
Read an existing sheet using get_sheet_by_name (This errors: "NotImplementedError: use 'iter_rows()' instead".)
Create a new sheet using create_sheet. (This errors:"ReadOnlyWorkbookException: Cannot create new sheet in a read-only workbook")
df=DataFrame()
from openpyxl.reader.excel import load_workbook
book = load_workbook('my_excel_file.xlsx', use_iterators=True) # Assume my_excel_file.xlsx contains a sheet called 'Data'
class temp_excel_writer(ExcelWriter): # I need this to inherit the other methods of ExcelWriter in io/parsers.py
def __init__(self, path, book):
self.book=book
test_sheet=self.book.create_sheet(title='Test') # This errors: ReadOnlyWorkbookException
self.use_xlsx = True
self.sheet_names=self.book.get_sheet_names()
self.actual_sheets=self.book.worksheets
self.sheets={}
for i,j in enumerate(self.sheet_names):
self.sheets[j] = (self.actual_sheets[i],1)
self.cur_sheet = None
self.path = save
my_temp_writer=temp_excel_writer('my_excel_file.xlsx', book)
df.to_excel(my_temp_writer, sheet_name='Data')
Any thoughts? Am I missing something obvious? I'm still in pandas 7.2
When you load your workbook with use_iterators=True, it then _set_optimized_read() on the Workbook object, which cause it to be loaded read-only.
Thus, with the following code :
from openpyxl.reader.excel import load_workbook
book = load_workbook('t.xlsx', use_iterators=False) # Assume t.xlsx contains ['Data', 'Feuil2', 'Feuil3']
print book.get_sheet_names()
class temp_excel_writer():
def __init__(self, path, book):
self.book=book
test_sheet=self.book.create_sheet(title='Test') # No exception here now
self.book.save(path)
self.use_xlsx = True
self.sheet_names=self.book.get_sheet_names()
print self.sheet_names
self.actual_sheets=self.book.worksheets
self.sheets={}
for i,j in enumerate(self.sheet_names):
self.sheets[j] = (self.actual_sheets[i],1)
self.cur_sheet = None
self.path = path # I had to modify this line also
my_temp_writer = temp_excel_writer('my_excel_file.xlsx', book)
It create a file named my_excel_file.xlsx and the following output :
['Data', 'Feuil2', 'Feuil3']
['Data', 'Feuil2', 'Feuil3', 'Test']
Hope it helps

Categories