Copy excel sheet from one worksheet to another in Python - python

All I want to do is copy a worksheet from an excel workbook to another excel workbook in Python.
I want to maintain all formatting (coloured cells, tables etc.)
I have a number of excel files and I want to copy the first sheet from all of them into one workbook. I also want to be able to update the main workbook if changes are made to any of the individual workbooks.
It's a code block that will run every few hours and update the master spreadsheet.
I've tried pandas, but it doesn't maintain formatting and tables.
I've tried openpyxl to no avail
I thought xlwings code below would work:
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
new_wb.sheets["Sheet1"] = sht
But I just get the error:
----> 4 new_wb.sheets["Sheet1"] = sht
AttributeError: __setitem__
"file1.xlsx" above is an example first excel file.
"Master Spreadsheet.xlsx" is my master spreadsheet with all individual files.

In the end I did this:
def copyExcelSheet(sheetName):
read_from = load_workbook(item)
#open(destination, 'wb').write(open(source, 'rb').read())
read_sheet = read_from.active
write_to = load_workbook("Master file.xlsx")
write_sheet = write_to[sheetName]
for row in read_sheet.rows:
for cell in row:
new_cell = write_sheet.cell(row=cell.row, column=cell.column,
value= cell.value)
write_sheet.column_dimensions[get_column_letter(cell.column)].width = read_sheet.column_dimensions[get_column_letter(cell.column)].width
if cell.has_style:
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
write_sheet.merge_cells('C8:G8')
write_sheet.merge_cells('K8:P8')
write_sheet.merge_cells('R8:S8')
write_sheet.add_table(newTable("table1","C10:G76","TableStyleLight8"))
write_sheet.add_table(newTable("table2","K10:P59","TableStyleLight9"))
write_to.save('Master file.xlsx')
read_from.close
With this to check if the sheet already exists:
#checks if sheet already exists and updates sheet if it does.
def checkExists(sheetName):
book = load_workbook("Master file.xlsx") # open an Excel file and return a workbook
if sheetName in book.sheetnames:
print ("Removing sheet",sheetName)
del book[sheetName]
else:
print ("No sheet ",sheetName," found, will create sheet")
book.create_sheet(sheetName)
book.save('Master file.xlsx')
with this to create new tables:
def newTable(tableName,ref,styleName):
tableName = tableName + ''.join(random.choices(string.ascii_uppercase + string.digits + string.ascii_lowercase, k=15))
tab = Table(displayName=tableName, ref=ref)
# Add a default style with striped rows and banded columns
tab.tableStyleInfo = TableStyleInfo(name=styleName, showFirstColumn=False,showLastColumn=False, showRowStripes=True, showColumnStripes=True)
return tab

Adapted from this solution, but note that in my (limited) testing (and as observed in the other Q&A), this does not support the After parameter of the Copy method, only Before. If you try to use After, it creates a new workbook instead.
import xlwings as xw
wb = xw.Book('individual_files\\file1.xlsx')
sht = wb.sheets[0]
new_wb = xw.Book('Master Spreadsheet.xlsx')
# copy this sheet into the new_wb *before* Sheet1:
sht.api.Copy(Before=new_wb.sheets['Sheet1'].api)
# now, remove Sheet1 from new_wb
new_wb.sheets['Sheet1'].delete()

This can be done using pywin32 directly. The Before or After parameter needs to be provided (see the api docs), and the parameter needs to be a worksheet <object>, not simply a worksheet Name or index value. So, for example, to add it to the end of an existing workbook:
def copy_sheet_within_excel_file(excel_filename, sheet_name_or_number_to_copy):
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
wb = excel_app.Workbooks.Open(excel_filename)
wb.Worksheets[sheet_name_or_number_to_copy].Copy(After=wb.Worksheets[wb.Worksheets.Count])
new_ws = wb.ActiveSheet
return new_ws
As most of my code runs on end-user machines, I don't like to make assumptions whether Excel is open or not so my code determines if Excel is already open (see GetActiveObject), as in:
try:
excel_app = win32com_client.GetActiveObject('Excel.Application')
except com_error:
excel_app = win32com_client.gencache.EnsureDispatch('Excel.Application')
And then I also check to see if the workbook is already loaded (see Workbook.FullName). Iterate through the Application.Workbooks testing the FullName to see if the file is already open. If so, grab that wb as your wb handle.
You might find this helpful for digging around the available Excel APIs directly from pywin32:
def show_python_interface_modules():
os.startfile(os.path.dirname(win32com_client.gencache.GetModuleForProgID('Excel.Application').__file__))

Related

updating and saving excel file using openpyxl and then reading it gives none values for formula based column

I am trying to update an excel sheet using openpyxl. When reading a updated formula based cell I am getting None output. The updates are not getting saved even though I have used openpyxl save command.
import openpyxl
# data_only=False to upadate excel file
def write_cell(data_only):
wb_obj = openpyxl.load_workbook("mydata.xlsx", data_only=data_only)
sheet_obj = wb_obj["Sheet1"]
sheet_obj = wb_obj.active
sheet_obj.cell(row = 1, column = 1).value = 8
wb_obj.save(filename="mydata.xlsx")
# data_only=True to read excel file"
def read_cell(data_only):
wb_obj = openpyxl.load_workbook("mydata.xlsx", data_only=data_only)
sheet = wb_obj["Sheet1"]
# Formula at column 2 : =A1*5
val = sheet.cell(row = 1, column = 2).value
return val
write_cell(False)
print(read_cell(True))
Actual Output -> None
Expected output -> 40
There are two solutions to this:
If you refer the documentation, it is mentioned that you can either have the formula or the value from formula. If you modify a file with formulae then you must pass it through some kind of application such as Excel and save it again which will now update the value of the formula. You won't get the none as the output now if you try to read the value of the cell containing formula.
Another solution is to open the excel file and save it from the script itself after saving it using openpyxl:
from win32com.client import Dispatch
import openpyxl
def write_cell(data_only):
wb_obj = openpyxl.load_workbook("mydata.xlsx", data_only=data_only)
sheet_obj = wb_obj["Sheet1"]
sheet_obj = wb_obj.active
sheet_obj.cell(row = 1, column = 1).value = 8
wb_obj.save(filename="mydata.xlsx")
open_save("mydata.xlsx")
def open_save(filename):
"""Function to open and save the excel file"""
xlApp = Dispatch("Excel.Application")
xlApp.Visible = False
xlBook = xlApp.Workbooks.Open(filename)
xlBook.Save()
xlBook.Close()

Openpyxl worksheet does not exist

I'm attempting to write to a cell using openpyxl, but am getting error "Worksheet Sheet1 does not exist."
def CreateWorkbook(workbook_path):
workbook = openpyxl.Workbook()
workbook.save(workbook_path)
return workbook_path
def CreateSheet(workbook, sheet_name):
workbook.create_sheet(sheet_name)
workbook.save(workbook_path)
return sheet_name
def WriteCell(workbook, sheet_name, cell, cell_data):
worksheet = workbook[sheet_name]
worksheet[cell] = cell_data
return
workbook = CreateWorkbook('workbook1.xlsx')
sheet = CreateSheet(workbook, 'Sheet1')
WriteCell(workbook, sheet, 'A1', 'testing')
This code isn't really good and has multiple errors. For example, in CreateWorkbook, you create a workbook, and then save it. However, after, in CreateSheet, you don't actually, reopen the file. The file must be re opened after every save. This is how I would fix those errors:
import openpyxl
def CreateWorkbook(workbook_path):
workbook = openpyxl.Workbook()
workbook.save(workbook_path)
return workbook_path
def CreateSheet(workbook, sheet_name):
wb = openpyxl.load_workbook(workbook)
wb.create_sheet(sheet_name)
wb.save(workbook)
return sheet_name
def WriteCell(workbook, sheet_name, cell, cell_data):
wb = openpyxl.load_workbook(workbook)
worksheet = wb[sheet_name]
worksheet[cell] = cell_data
wb.save(workbook)
return
workbook = CreateWorkbook('workbook1.xlsx')
sheet = CreateSheet(workbook, 'Sheet1')
WriteCell(workbook, sheet, 'A1', 'testing')
Maybe you should post the complete class code, there are some things that don't really make sense like for instance in:
def CreateSheet(workbook, sheet_name):
workbook.create_sheet(sheet_name)
workbook.save(workbook_path)
return sheet_name
when you return sheet_name - this is basically the same value as you give in the input, it's pointless. Secondly, you use workbook_path as a parameter, I assume it's a class attribute, otherwise it's not clear how you have it in the method.
Also in:
def CreateWorkbook(workbook_path):
workbook = openpyxl.Workbook()
workbook.save(workbook_path)
return workbook_path
you return the same input argument workbook_path which again dosn't make sense and in the code below you use the workbook_path as workbook. This here might be the error, but there are a few so might be also sth else.
For your problem try to debug the code, eventually print out at every method a "proof" that the object created was actually created.

How can I iterate over worksheets in win32com?

I generate an xlsx file with lots of sheets and I want to take me at specific position when I open it manually with Excel. This function does the job but for one sheet only. How can I apply it to all of the sheets in workbook?
import win32com.client
def select_cell():
xl = win32com.client.gencache.EnsureDispatch('Excel.Application')
wb = xl.Workbooks.Open(r'H:\Files\1.xlsx')
ws = xl.ActiveSheet
ws.Range('B100').Select()
wb.Close(True)
xl.Quit()
select_cell()
I want to make something like this:
import win32com.client
def select_cell():
xl = win32com.client.gencache.EnsureDispatch('Excel.Application')
wb = xl.Workbooks.Open(r'H:\Files\1.xlsx')
for ws in wb.Worksheets():
ws.Range('B100').Select()
wb.Close(True)
xl.Quit()
select_cell()
In order to be taken to specific cell in newly generated document it is necessary to have both of these expressions executed:
ws.Range('k100').Value = 1
ws.Range('k100').Select()
To do it in every sheet of the workbook:
def select_cell():
xl = win32com.client.gencache.EnsureDispatch('Excel.Application')
wb = xl.Workbooks.Open(r'H:\Files1.xlsx')
for sh in wb.Sheets:
xl.Worksheets(sh.Name).Activate()
ws = xl.ActiveSheet
ws.Range('k100').Value = 1
ws.Range('k100').Select()
wb.Close(True)
xl.Quit()
The code above will take you to K100 on every worksheet in the book.
Your script did nothing at all when I tested it.
The script below worked fine, based on my test.
import xlsxwriter
# Create an new Excel file and add a worksheet.
workbook = xlsxwriter.Workbook('C:\\Users\\Excel\\Desktop\\book1.xlsx')
worksheet = workbook.add_worksheet()
# Widen the first column to make the text clearer.
worksheet.set_column('A:A', 20)
# Add a bold format to use to highlight cells.
bold = workbook.add_format({'bold': True})
# Write some simple text.
worksheet.write('A1', 'Hello')
# Text with formatting.
worksheet.write('A2', 'World', bold)
# Write some numbers, with row/column notation.
worksheet.write(2, 0, 123)
worksheet.write(3, 0, 123.456)
workbook.close()

Activate second worksheet with openpyxl

I am trying to activate multiple excel worksheets and write to both multiple sheets within both workbook(s) using python and openpyxl. I am able to load the second workbook f but I am unable to append cell G2 of my second workbook with the string Recon
from openpyxl import Workbook, load_workbook
filename = 'sda_2015.xlsx'
wb = Workbook()
ws = wb.active
ws['G1'] = 'Path'
ws.title = 'Main'
adf = "Dirty Securities 04222015.xlsx"
f = "F:\\ana\\xlmacro\\" + adf
wb2 = load_workbook(f)
"""
wb22 = Workbook(wb2)
ws = wb22.active
ws['G1'] = "Recon2"
ws.title = 'Main2'
"""
print wb2.get_sheet_names()
wb.save(filename)
I commented out the code which is broken
Update
I adjusted my code with the below answer. The value in cell H1 is written onto wb2 in column H, but for some reason the column is hidden. I have adjusted the column to other columns but still I have seen the code hide multiple columns. There are also occurences when the code executes and titles ws2 as Main21 but the encoded value is Main2
from openpyxl import Workbook, load_workbook
filename = 'sda_2015.xlsx'
wb1 = Workbook()
ws1 = wb1.active
ws1['G1'] = 'Path'
ws1.title = 'Main'
adf = "Dirty Securities 04222015.xlsx"
f = "F:\\ana\\xlmacro\\" + adf
wb2 = load_workbook(f)
ws2 = wb2.active
ws2['H1'] = 'Recon2'
ws2.title = 'Main2'
print wb2.get_sheet_names()
wb1.save(filename)
wb2.save(f)
If you have two workbooks open, wb1 and wb2, you'll also need different names for the various worksheets: ws1 = wb1.active and ws2 = wb2.active.
If you're working with a file with macros, you'll need to set the keep_vba flag to True when opening it in order to preserve the macros.
I had experienced the same thing with hidden cells. Eventually, I unpacked the Excel file and looked at the raw XML to find out that not all of the columns had a dimension for width. Those without a width were being by Excel.
A quick fix is to do something like this...
for col in 'ABCDEFG':
if not worksheet.column_dimensions[col].width:
worksheet.column_dimensions[col].width = 10

python write to EXCEL cell range (NumPy?)

I am trying to write a matrix prepared with NumPy to an EXCEL file. I need to specify a range of cells in which the matrix must be written.
I need to write the matrix to cells A4:Z512 in sheet 4 of the EXCEL file.
Now, the standard EXCEL file has 3 sheets, so I need to first add a 4th sheet and then write the matrix to it.
Is there a way to do this in python 2.7? Is it possible to do this with pure NumPy or not?
I have not used NumPy, so I am not sure if you can manipulate excel files. But for working on excel files I recommend using the win32com library. Below is some code I have used in the past to make win32com API easier to use. Feel free to use the code yourself. Hope this helps!
import win32com.client as win32
excel = win32.gencache.EnsureDispatch('Excel.Application')
def openExcel(makeExcelVisible=True):
excel.Visible = makeExcelVisible
def closeExcel():
excel.Application.Quit()
class ExcelFile(object):
# opens up a workbook to work on, not selecting a file name creates a new one
def __init__(self, fileName=None):
if fileName == None:
self.wb = excel.Workbooks.Add()
else:
self.wb = excel.Workbooks.Open(fileName)
self.ws = None
def addWorksheet(self):
# adds a new worksheet to the workbook and makes it the current worksheet
self.ws = self.wb.Worksheets.Add()
def selectWorksheet(self, worksheetName):
# selects a worksheet to work on
self.ws = self.wb.Worksheets(worksheetName)
def renameWorksheet(self, worksheetName):
# renames current worksheet
self.ws.Name = worksheetName
def save(self):
# saves the workbook
self.wb.Save()
def saveAs(self, fileName):
# saves the workbook with the file name
self.wb.SaveAs(fileName)
def close(self):
# closes the workbook
self.wb.Close()
def insertIntoCell(self, cellRow, cellCol, data):
self.ws.Cells(cellRow,cellCol).Value = data
def clearCell(self, cellRow, cellCol):
self.ws.Cells(cellRow,cellCol).Value = None
Here is an example of how to use the code above. It creates a new excel file, renames the worksheets, adds information into the first cell on each worksheet and saves the file as "test.xlsx". Default save location is your home directory.
worksheets = ["Servers", "Printers", "Drives", "IT Phones"]
information = ["Server WS", "Printer WS", "Driver WS", "IT Phone WS"]
def changeCells(information):
excelFile = ExcelFile()
for index in xrange(len(worksheets)):
sheetNumber = index + 1
if sheetNumber == 4:
excelFile.addWorksheet()
excelFile.selectWorksheet("Sheet%d" % sheetNumber)
excelFile.renameWorksheet(worksheets[index])
excelFile.insertIntoCell(1,1,information[index])
excelFile.saveAs("test.xlsx")
excelFile.close()
openExcel()
changeCells(information)
closeExcel()
Also, I would recommend looking at the API for win32com yourself. It's a very nice and useful library.
I put together the actual code you would need for entering your matrix on Sheet4 to A4:Z512.
def addMatrix(matrix):
# use ExcelFile("fileName.xlsx") if you need to add to a specific file
excelFile = ExcelFile()
excelFile.addWorksheet()
# default excel files only have 3 sheets so had to add one
excelFile.selectWorksheet("Sheet4")
# xrange(4,513) since end index is exclusive
for row in xrange(4,513):
# 1 for A, 26 for Z
for col in xrange(1,27):
mRow = row - 4
mCol = col - 1
excelFile.insertIntoCell(row, col, matrix[mRow][mCol])
excelFile.saveAs("test.xlsx")
excelFile.close()
matrix = list()
for row in xrange(509):
matrix.append([])
for col in xrange(26):
matrix[row].append(0)
# the matrix is now filled with zeros
# use openExcel(False) to run faster, it won't open a window while running
openExcel()
addMatrix(matrix)
closeExcel()

Categories