Merging two .xlsx files

Merging two .xlsx files - python

I copy the content from a .xlsx file to another .xlsx file.
Openpyxl can't handle headerimages, so i create a .xlsx File with xlsxwriter including the header image and then copy the content with Openpyxl to the second .xlsx file. Works fine but openpyxl delete the headerimage. How to keep the Image in the header?
This is my code so far:
import openpyxl as xl
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl import load_workbook
import xlsxwriter
logo = "logo.jpg"
########################################################
## Create new sysfile with xlsxwriter
########################################################
workbook = xlsxwriter.Workbook('new_sysfile.xlsx')
#Insert Worksheets
worksheet1 = workbook.add_worksheet('Sheet1')
worksheet2 = workbook.add_worksheet('Sheet2')
worksheet3 = workbook.add_worksheet('Sheet3')
cell_format = workbook.add_format()
cell_format.set_font_name('Arial')
cell_format.set_font_size('11')
worksheet1.set_landscape()
worksheet2.set_landscape()
worksheet3.set_landscape()
header1 = '&L&G' + '&R hText '
worksheet1.set_margins(top=1)
worksheet1.set_header(header1, {'image_left': logo})
worksheet2.set_margins(top=1)
worksheet2.set_header(header1, {'image_left': logo})
worksheet3.set_margins(top=1)
worksheet3.set_header(header1, {'image_left': logo})
workbook.close()
#############################################################
# opening the source excel file
sourcefile = "sysfile2.xlsx"
wb1 = xl.load_workbook(sourcefile)
ws1 = wb1["Sheet1"]
ws2 = wb1["Sheet2"]
ws3 = wb1["Sheet3"]
# opening the destination excel file
dest_file = "new_sysfile.xlsx"
wb2 = xl.load_workbook(dest_file)
ws_dest1 = wb2["Sheet1"]
ws_dest2 = wb2["Sheet2"]
ws_dest3 = wb2["Sheet3"]
# some formatting
# calculate total number of rows and
mr1 = ws1.max_row
mc1 = ws1.max_column
mr2 = ws2.max_row
mc2 = ws2.max_column
mr3 = ws3.max_row
mc3 = ws3.max_column
# copying the cell values from source
for i in range(1, mr1 + 1):
for j in range(1, mc1 + 1):
# reading cell value from source excel file
c = ws1.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest1.cell(row=i, column=j).value = c.value
# SECOND SHEET
for i in range(1, mr2 + 1):
for j in range(1, mc2 + 1):
# reading cell value from source excel file
c = ws2.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest2.cell(row=i, column=j).value = c.value
# THIRD SHEET
for i in range(1, mr3 + 1):
for j in range(1, mc3 + 1):
# reading cell value from source excel file
c = ws3.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest3.cell(row=i, column=j).value = c.value
ws2.sheet_properties.pageSetUpPr.fitToPage = True
ws2.page_setup.fitToWidth = True
# (some formatting)
ws_dest1.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest1.page_setup.fitToHeight = False
ws_dest2.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest2.page_setup.fitToHeight = False
ws_dest3.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest3.page_setup.fitToHeight = False
wb2.save(str(dest_file))
I hope someone have a solution.
Thank you.

Related

Python openpyxl add image to excel file

if i separate the code only for add image and only for copy the format all can work normally.But when i combine it and it's always come out : UserWarning: wmf image format is not supported so the image is being dropped.How to solve it ?
import openpyxl
import copy
from openpyxl.utils import get_column_letter
# It is not required for one to create a workbook on
# filesystem, therefore creating a virtual workbook
wrkb = openpyxl.Workbook()
# Number of sheets in the workbook (1 sheet in our case)
ws = wrkb.worksheets[0]
# Adding a row of data to the worksheet (used to
# distinguish previous excel data from the image)
path = "/Users/LEON/OneDrive/Desktop/copy.xlsx"
save_path = "/Users/LEON/OneDrive/Desktop/paste.xlsx"
wb = openpyxl.load_workbook(path)
wb2 = openpyxl.Workbook()
sheetnames = wb.sheetnames
for sheetname in sheetnames:
print(sheetname)
sheet = wb[sheetname]
sheet2 = wb2.create_sheet(sheetname)
# tab颜色
sheet2.sheet_properties.tabColor = sheet.sheet_properties.tabColor
# 开始处理合并单元格形式为“(<CellRange A1：A4>,)，替换掉(<CellRange 和 >,)' 找到合并单元格
wm = list(sheet.merged_cells)
if len(wm) > 0:
for i in range(0, len(wm)):
cell2 = str(wm[i]).replace('(<CellRange ', '').replace('>,)', '')
sheet2.merge_cells(cell2)
for i, row in enumerate(sheet.iter_rows()):
sheet2.row_dimensions[i+1].height = sheet.row_dimensions[i+1].height
for j, cell in enumerate(row):
sheet2.column_dimensions[get_column_letter(j+1)].width = sheet.column_dimensions[get_column_letter(j+1)].width
sheet2.cell(row=i + 1, column=j + 1, value=cell.value)
# 设置单元格格式
source_cell = sheet.cell(i+1, j+1)
target_cell = sheet2.cell(i+1, j+1)
target_cell.fill = copy.copy(source_cell.fill)
if source_cell.has_style:
target_cell._style = copy.copy(source_cell._style)
target_cell.font = copy.copy(source_cell.font)
target_cell.border = copy.copy(source_cell.border)
target_cell.fill = copy.copy(source_cell.fill)
target_cell.number_format = copy.copy(source_cell.number_format)
target_cell.protection = copy.copy(source_cell.protection)
target_cell.alignment = copy.copy(source_cell.alignment)
# A wrapper over PIL.Image, used to provide image
# inclusion properties to openpyxl library
img = openpyxl.drawing.image.Image('hlrb_image.png')
# The Coordinates where the image would be pasted
# (an image could span several rows and columns
# depending on it's size)
img.anchor = 'A2'
# Adding the image to the worksheet
# (with attributes like position)
ws.add_image(img)
# Saving the workbook created under the name of out.xlsx
wrkb.save('out.xlsx')
i need a logo top there and copy format detail down there.If i separate run the code,it will delete the sheet.Like only the logo there or only the copy format there dont hv logo.

Save copied data to different excel files

I'm trying to save different parts of copied data from one excel file, to the correspond new excel file, but I'm getting everything on one new file.
What am I doing wrong?
For example, in the picture I want to have 4 new excel files, with the names AAA, BBB, CCC and DDD.
The separation is every time a blank row appears.
The data to be copied will be from that row until the next blank row. (all columns)
Thanks
wb1 = xl.load_workbook(xref_file)
ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
for row in range(1,ws1.max_row):
if(ws1.cell(row,1).value is None):
wb2 = openpyxl.Workbook()
ws2 = wb2.active
conveyor_name = ws1["A1"].value
conveyor_name.split(' -')
conveyor_name = conveyor_name.split(' -')[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
wb2.save(destination_file)
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr+1):
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
wb2.save(destination_file)
ws1.delete_rows(1,mr)
wb1.save(xref_file)

Edit 1:
The changes that I have made to your code are that I have additional conditions which check whether an empty row is present or not, if it is an empty row then it will set the new_sheet variable to true. If it is not empty and if new_sheet is true then a new sheet is created, else a loop starts which copies the content to the new sheet.
Hence the updated code should be as follows:
wb1 = openpyxl.load_workbook(destination + "/" + xref_file)
ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
new_sheet = True
# for row in range(1,ws1.max_row):
row = 1
while row < ws1.max_row:
if(ws1.cell(row,1).value is not None):
if new_sheet == True:
wb2 = openpyxl.Workbook()
ws2 = wb2.active
conveyor_name = ws1["A" + str(row)].value
conveyor_name = conveyor_name.split()[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
print(destination_file)
wb2.save(destination_file)
new_sheet = False
row = row + 1
else:
# copying the cell values from source
# excel file to destination excel file
for i in range (row, mr+1):
if ws1.cell(i,1).value is None:
break
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i - row + 1, column = j).value = c.value
row = i
wb2.save(destination_file)
else:
new_sheet = True
row = row + 1
Edit 0: There are several optimizations and errors that can be observed. Since I don't know the nature of the data in excel sheet, some of these may not apply to you
wb1 = xl.load_workbook(xref_file) ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
for row in range(1,ws1.max_row):
if(ws1.cell(row,1).value is None):
wb2 = openpyxl.Workbook()
ws2 = wb2.active
If the above If condition is true then there shouldn't be any value in A1. Further, if there should be a value in A1 then you may want to check the values from the second row to pass the if condition, hence row should have values starting from 2 to ws1.max_row
conveyor_name = ws1["A1"].value
This below line doesn't make much sense because you are again using the same value in the line after that
conveyor_name.split(' -') # this is not required
conveyor_name = conveyor_name.split(' -')[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
wb2.save(destination_file)
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr+1):
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
wb2.save(destination_file)
ws1.delete_rows(1,mr) wb1.save(xref_file)

Python/openpyxl - Is there a way to copy a worksheet from one workbook to another with all properties (exact copy)

I have researched through several similar threads of discussion on this forum and tried several things recommended, but I am not able to get all properties of the source worksheet copied over. Here's my code and I see that column widths and a few other things are not coped over. Would have been great if openpyxl implemented a function to copy a worksheet with all its attributes.
def copy_worksheet(src_xl, dest_xl, src_ws, dest_ws):
import openpyxl as xl
from copy import copy
# opening the source excel file
wb1 = xl.load_workbook(src_xl)
sheet_names = wb1.sheetnames
index = sheet_names.index(src_ws)
ws1 = wb1.worksheets[index]
# opening the destination excel file
wb2 = xl.load_workbook(dest_xl)
sheet_names = wb2.sheetnames
try:
index = sheet_names.index(dest_ws)
except:
ws2 = wb2.create_sheet(dest_ws)
else:
ws2 = wb2.worksheets[index]
# calculate total number of rows and
# columns in source excel file
mr = ws1.max_row
mc = ws1.max_column
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr + 1):
for j in range (1, mc + 1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
cell = c
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
new_cell = ws2.cell(row = i, column = j)
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
# saving the destination excel file
wb2.save(str(dest_xl))

This seems to do the job - to set the column widths:
from openpyxl.utils import get_column_letter
for i in range(ws1.max_column):
ws2.column_dimensions[get_column_letter(i+1)].width = ws1.column_dimensions[get_column_letter(i+1)].width

How to write multiple sheets into a new excel, without overwriting each other?

I'm trying to write multiple excels' column A into a new excel's column A (assuming all the excels have one worksheet each.) I've written some code, which can write one excel's column A into the new excel's column A; but if there are multiple excels, the new excel's column A will be overwritten multiple times. So how could I just add all the column As to the new excel sheet one after another without overwriting each other?
Below are my code:
import os, openpyxl
path = os.getcwd()
def func(file):
for file in os.listdir(path):
if file.endswith('.xlsx'):
wb = openpyxl.load_workbook(file)
sheet = wb.active
colA = sheet['A']
wb = openpyxl.Workbook()
r = 1
for i in colA:
sheet = wb.active
sheet.cell(row=r, column=1).value = i.value
r += 1
wb.save('new.xlsx')
func(file)
Thank you so much!!

you could proceed for example as:
import os, openpyxl
path = os.getcwd()
def func(outputFile):
c = 0
#create output workbook
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
c += 1 #move to the next column in output
wb = openpyxl.load_workbook(fName)
sheet = wb.active #input sheet
#for r in range(1, sheet.max_row+1):
# sheetOut.cell(row=r, column=c).value = sheet.cell(row = r, column = 1).value
for r, cell in enumerate(sheet['A']):
sheetOut.cell(row = r+1, column = c).value = cell.value
wbOut.save(outputFile)
#"concatenate" all columns A into one single column
def funcAppend(outputFile):
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
r = 1
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
wb = openpyxl.load_workbook(fName)
sheet = wb.active
for cell in sheet['A']:
sheetOut.cell(row = r, column = 1).value = cell.value
r += 1
wbOut.save(outputFile)
func('test.xlsx')

write xlsx file using openpyxl module in python

I am having trouble writing to excel file using openpyxl module. So far I am able to write this code
from openpyxl.workbook import Workbook
import datetime
header = [u'Name', u'Email', u'Mobile', u'Current location',]
new_data = [
[u'name1', u'email1#yahoo.com', 9929283421.0, u'xxxx'],
[u'name2', u'email2#xyz.com', 9994191988.0, u'xxxx']
]
wb = Workbook()
cur_date = str(datetime.date.today())
log_file = "%s/%s_%s_errorlog.xlsx" % (settings.MEDIA_ROOT,
os.path.splitext(file_name)[0],
cur_date)
log_csv = wb.worksheets[0]
for i in range(1, len(header) + 1):
log_csv.cell(row = 1 ,column = i).value = header[i - 1]
wb.save(log_file)
error_count = 0
for each_row in new_data:
error_count += 1
for i in range(1, len(each_row) + 1):
log_csv.cell(row = error_count ,column = i).value = each_row[i - 1]
wb.save(log)
File is created, but it is corrupted and I am not able to open it with the excel file reader (LibreOffice) provided by the OS (ubuntu). Also the contents of the file are not readable. Not sure what I am doing wrong

from openpyxl.workbook import Workbook
header = [u'Name', u'Email', u'Mobile', u'Current location',]
new_data = [[u'name1', u'email1#yahoo.com', 9929283421.0, u'xxxx'],
[u'name2', u'email2#xyz.com', 9994191988.0, u'xxxx']]
wb = Workbook()
dest_filename = 'empty_book.xlsx'
ws1 = wb.active
ws1.title = "range names"
ws1.append(header)
for row in new_data:
ws1.append(row)
wb.save(filename = dest_filename)
I am able to write the content to xlsx like above.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Merging two .xlsx files - python

Related

Python openpyxl add image to excel file

Save copied data to different excel files

Python/openpyxl - Is there a way to copy a worksheet from one workbook to another with all properties (exact copy)

How to write multiple sheets into a new excel, without overwriting each other?

write xlsx file using openpyxl module in python

Categories

Resources