Save copied data to different excel files - python

I'm trying to save different parts of copied data from one excel file, to the correspond new excel file, but I'm getting everything on one new file.
What am I doing wrong?
For example, in the picture I want to have 4 new excel files, with the names AAA, BBB, CCC and DDD.
The separation is every time a blank row appears.
The data to be copied will be from that row until the next blank row. (all columns)
Thanks
wb1 = xl.load_workbook(xref_file)
ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
for row in range(1,ws1.max_row):
if(ws1.cell(row,1).value is None):
wb2 = openpyxl.Workbook()
ws2 = wb2.active
conveyor_name = ws1["A1"].value
conveyor_name.split(' -')
conveyor_name = conveyor_name.split(' -')[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
wb2.save(destination_file)
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr+1):
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
wb2.save(destination_file)
ws1.delete_rows(1,mr)
wb1.save(xref_file)

Edit 1:
The changes that I have made to your code are that I have additional conditions which check whether an empty row is present or not, if it is an empty row then it will set the new_sheet variable to true. If it is not empty and if new_sheet is true then a new sheet is created, else a loop starts which copies the content to the new sheet.
Hence the updated code should be as follows:
wb1 = openpyxl.load_workbook(destination + "/" + xref_file)
ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
new_sheet = True
# for row in range(1,ws1.max_row):
row = 1
while row < ws1.max_row:
if(ws1.cell(row,1).value is not None):
if new_sheet == True:
wb2 = openpyxl.Workbook()
ws2 = wb2.active
conveyor_name = ws1["A" + str(row)].value
conveyor_name = conveyor_name.split()[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
print(destination_file)
wb2.save(destination_file)
new_sheet = False
row = row + 1
else:
# copying the cell values from source
# excel file to destination excel file
for i in range (row, mr+1):
if ws1.cell(i,1).value is None:
break
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i - row + 1, column = j).value = c.value
row = i
wb2.save(destination_file)
else:
new_sheet = True
row = row + 1
Edit 0: There are several optimizations and errors that can be observed. Since I don't know the nature of the data in excel sheet, some of these may not apply to you
wb1 = xl.load_workbook(xref_file) ws1 = wb1.worksheets[0]
mr = ws1.max_row
mc = ws1.max_column
for row in range(1,ws1.max_row):
if(ws1.cell(row,1).value is None):
wb2 = openpyxl.Workbook()
ws2 = wb2.active
If the above If condition is true then there shouldn't be any value in A1. Further, if there should be a value in A1 then you may want to check the values from the second row to pass the if condition, hence row should have values starting from 2 to ws1.max_row
conveyor_name = ws1["A1"].value
This below line doesn't make much sense because you are again using the same value in the line after that
conveyor_name.split(' -') # this is not required
conveyor_name = conveyor_name.split(' -')[0]
filename = conveyor_name + ".xlsx"
destination_file = os.path.join(destination,filename)
wb2.save(destination_file)
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr+1):
for j in range (1, mc+1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
wb2.save(destination_file)
ws1.delete_rows(1,mr) wb1.save(xref_file)

Related

How do I get user input into an excel spreadsheet via input() either in a csv or xlsx spreadsheet?

So far, I have been able to access csv and xlsx files in python, but I am unsure how to put in user inputs input() to add data to the spreadsheet.
I would also want this input() to only be enterable once per day but for different columns in my spreadsheet. (this is a separate issue)
Here is my code so far, first for csv, second for xlsx, I don't need both just either will do:
# writing to a CSV file
import csv
def main():
filename = "EdProjDBeg.csv"
header = ("Ans1", "Ans2", "Ans3")
data = [(0, 0, 0)]
writer(header, data, filename, "write")
updater(filename)
def writer(header, data, filename, option):
with open(filename, "w", newline = "") as csvfile:
if option == "write":
clidata = csv.writer(csvfile)
clidata.writerow(header)
for x in data:
clidata.writerow(x)
elif option == "update":
writer = csv.DictWriter(csvfile, fieldnames = header)
writer.writeheader()
writer.writerows(data)
else:
print("Option is not known")
# Updating the CSV files with new data
def updater(filename):
with open(filename, newline= "") as file:
readData = [row for row in csv.DictReader(file)]
readData[0]['Ans2'] = 0
readHeader = readData[0].keys()
writer(readHeader, readData, filename, "update")
# Reading and updating xlsx files
import openpyxl
theFile = openpyxl.load_workbook(r'C:\Users\joe_h\OneDrive\Documents\Data Analysis STUDYING\Excel\EdProjDBeg.xlsx')
print(theFile.sheetnames)
currentsheet = theFile['Customer1']
print(currentsheet['B3'].value)
wb = openpyxl.load_workbook(r'C:\Users\joe_h\OneDrive\Documents\Data Analysis STUDYING\Excel\EdProjDBeg.xlsx')
ws = wb.active
i = 0
cell_val = ''
# Finds which row is blank first
while cell_val != '':
cell_val = ws['A' + i].value
i += 1
# Modify Sheet, Starting With Row i
wb.save(r'C:\Users\joe_h\OneDrive\Documents\Data Analysis STUDYING\Excel\EdProjDBeg.xlsx')
x = input('Prompt: ')
This works for inputting data into an xlsx file.
Just use:
ws['A1'] = "data"
to input into cell A1
See code below for example using your original code:
wb = openpyxl.load_workbook('sample.xlsx')
print(wb.sheetnames)
currentsheet = wb['Sheet']
ws = currentsheet
#ws = wb.active <-- defaults to first sheet
i = 0
cell_val = ''
# Finds which row is blank first
while cell_val != None:
i += 1
cell_val = ws['A' + str(i)].value
print(cell_val)
x = input('Prompt: ')
#sets A column of first blank row to be user input
ws['A' + str(i)] = x
#saves spreadsheet
wb.save("sample.xlsx")
Also just made a few edits to your original while loop in the above code:
When a cell is blank, 'None' is returned
A1 is the first cell on the left, not A0 (moved i += 1 above finding value of cell)
Converted variable 'i' to a string when accessing the cell
See https://openpyxl.readthedocs.io/en/stable/ for the full documentation

Merging two .xlsx files

I copy the content from a .xlsx file to another .xlsx file.
Openpyxl can't handle headerimages, so i create a .xlsx File with xlsxwriter including the header image and then copy the content with Openpyxl to the second .xlsx file. Works fine but openpyxl delete the headerimage. How to keep the Image in the header?
This is my code so far:
import openpyxl as xl
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl import load_workbook
import xlsxwriter
logo = "logo.jpg"
########################################################
## Create new sysfile with xlsxwriter
########################################################
workbook = xlsxwriter.Workbook('new_sysfile.xlsx')
#Insert Worksheets
worksheet1 = workbook.add_worksheet('Sheet1')
worksheet2 = workbook.add_worksheet('Sheet2')
worksheet3 = workbook.add_worksheet('Sheet3')
cell_format = workbook.add_format()
cell_format.set_font_name('Arial')
cell_format.set_font_size('11')
worksheet1.set_landscape()
worksheet2.set_landscape()
worksheet3.set_landscape()
header1 = '&L&G' + '&R hText '
worksheet1.set_margins(top=1)
worksheet1.set_header(header1, {'image_left': logo})
worksheet2.set_margins(top=1)
worksheet2.set_header(header1, {'image_left': logo})
worksheet3.set_margins(top=1)
worksheet3.set_header(header1, {'image_left': logo})
workbook.close()
#############################################################
# opening the source excel file
sourcefile = "sysfile2.xlsx"
wb1 = xl.load_workbook(sourcefile)
ws1 = wb1["Sheet1"]
ws2 = wb1["Sheet2"]
ws3 = wb1["Sheet3"]
# opening the destination excel file
dest_file = "new_sysfile.xlsx"
wb2 = xl.load_workbook(dest_file)
ws_dest1 = wb2["Sheet1"]
ws_dest2 = wb2["Sheet2"]
ws_dest3 = wb2["Sheet3"]
# some formatting
# calculate total number of rows and
mr1 = ws1.max_row
mc1 = ws1.max_column
mr2 = ws2.max_row
mc2 = ws2.max_column
mr3 = ws3.max_row
mc3 = ws3.max_column
# copying the cell values from source
for i in range(1, mr1 + 1):
for j in range(1, mc1 + 1):
# reading cell value from source excel file
c = ws1.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest1.cell(row=i, column=j).value = c.value
# SECOND SHEET
for i in range(1, mr2 + 1):
for j in range(1, mc2 + 1):
# reading cell value from source excel file
c = ws2.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest2.cell(row=i, column=j).value = c.value
# THIRD SHEET
for i in range(1, mr3 + 1):
for j in range(1, mc3 + 1):
# reading cell value from source excel file
c = ws3.cell(row=i, column=j)
# writing the read value to destination excel file
ws_dest3.cell(row=i, column=j).value = c.value
ws2.sheet_properties.pageSetUpPr.fitToPage = True
ws2.page_setup.fitToWidth = True
# (some formatting)
ws_dest1.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest1.page_setup.fitToHeight = False
ws_dest2.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest2.page_setup.fitToHeight = False
ws_dest3.sheet_properties.pageSetUpPr.fitToPage = True
ws_dest3.page_setup.fitToHeight = False
wb2.save(str(dest_file))
I hope someone have a solution.
Thank you.

Python/openpyxl - Is there a way to copy a worksheet from one workbook to another with all properties (exact copy)

I have researched through several similar threads of discussion on this forum and tried several things recommended, but I am not able to get all properties of the source worksheet copied over. Here's my code and I see that column widths and a few other things are not coped over. Would have been great if openpyxl implemented a function to copy a worksheet with all its attributes.
def copy_worksheet(src_xl, dest_xl, src_ws, dest_ws):
import openpyxl as xl
from copy import copy
# opening the source excel file
wb1 = xl.load_workbook(src_xl)
sheet_names = wb1.sheetnames
index = sheet_names.index(src_ws)
ws1 = wb1.worksheets[index]
# opening the destination excel file
wb2 = xl.load_workbook(dest_xl)
sheet_names = wb2.sheetnames
try:
index = sheet_names.index(dest_ws)
except:
ws2 = wb2.create_sheet(dest_ws)
else:
ws2 = wb2.worksheets[index]
# calculate total number of rows and
# columns in source excel file
mr = ws1.max_row
mc = ws1.max_column
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr + 1):
for j in range (1, mc + 1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
cell = c
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
new_cell = ws2.cell(row = i, column = j)
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
# saving the destination excel file
wb2.save(str(dest_xl))
This seems to do the job - to set the column widths:
from openpyxl.utils import get_column_letter
for i in range(ws1.max_column):
ws2.column_dimensions[get_column_letter(i+1)].width = ws1.column_dimensions[get_column_letter(i+1)].width

How to write multiple sheets into a new excel, without overwriting each other?

I'm trying to write multiple excels' column A into a new excel's column A (assuming all the excels have one worksheet each.) I've written some code, which can write one excel's column A into the new excel's column A; but if there are multiple excels, the new excel's column A will be overwritten multiple times. So how could I just add all the column As to the new excel sheet one after another without overwriting each other?
Below are my code:
import os, openpyxl
path = os.getcwd()
def func(file):
for file in os.listdir(path):
if file.endswith('.xlsx'):
wb = openpyxl.load_workbook(file)
sheet = wb.active
colA = sheet['A']
wb = openpyxl.Workbook()
r = 1
for i in colA:
sheet = wb.active
sheet.cell(row=r, column=1).value = i.value
r += 1
wb.save('new.xlsx')
func(file)
Thank you so much!!
you could proceed for example as:
import os, openpyxl
path = os.getcwd()
def func(outputFile):
c = 0
#create output workbook
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
c += 1 #move to the next column in output
wb = openpyxl.load_workbook(fName)
sheet = wb.active #input sheet
#for r in range(1, sheet.max_row+1):
# sheetOut.cell(row=r, column=c).value = sheet.cell(row = r, column = 1).value
for r, cell in enumerate(sheet['A']):
sheetOut.cell(row = r+1, column = c).value = cell.value
wbOut.save(outputFile)
#"concatenate" all columns A into one single column
def funcAppend(outputFile):
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
r = 1
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
wb = openpyxl.load_workbook(fName)
sheet = wb.active
for cell in sheet['A']:
sheetOut.cell(row = r, column = 1).value = cell.value
r += 1
wbOut.save(outputFile)
func('test.xlsx')

Excel File and Sheet Location

I have a list of excel files and their corresponding sheet number. I need python to go to those sheets and find out the cell location for a particular content. Thanks to "alecxe", I used the following code and it worked well.
import xlrd
value = 'Avg.'
fn = ('C:/ab1.xls', 'C:/ab2.xls','C:/ab3.xls','C:/ab4.xls','C:/ab5.xls',)
sn = ('505840', '505608', '430645', '505464', '505084')
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Later I wanted to make changes and instead of writing down the filename and sheetnumber, I wanted python to grab them from an excel sheet. But the program is not printing anything. Can anyone show me where I made the mistake? Highly appreciate your comment!
import xlrd
import glob
import os
value = 'Avg.'
sheetnumber = []
filename = []
xlfile = "C:\\Users\\tsengineer\\Desktop\\Joydip Trial\\Simple.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
filename.append(value)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
sheetnumber.append(value)
fn = tuple(filename)
sn = tuple(sheetnumber)
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Definitely for some reasons, the loop is not working as I am getting two empty lists as output. Any thoughts?
import xlrd
value = 'Avg.'
sheetnumber = []
filename = []
rowlist = []
columnlist = []
xlfile = "C:/Users/Joyd/Desktop/Experiment/Simple_1.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
filename.append(value)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
sheetnumber.append(value)
fn = tuple(filename)
sn = tuple(sheetnumber)
for fname, sname in zip(fn, sn):
book = xlrd.open_workbook(fname)
sheet = book.sheet_by_name(sname)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
rowlist.append(row)
columnlist.append(column)
print rowlist
print columnlist

Categories