Understanding for loop with xlwt - python

enter image description hereI would like to perform a calculation with XLWT I'm new to coding and bit confused with using for loop to repeat the formula in following columns, please help me understanding the for loop function for the below calculation
Please refer the image, I need a for loop to fill rest of the columns, so that I do not have to type 'w_sheet.write(1,2,xlwt.Formula('A2-B2'))' multiple times
import xlrd
import xlwt
from xlutils.copy import copy
wb = xlrd.open_workbook('Subtract.xls')
rb = copy(rb)
w_sheet = wb.get_sheet(0)
w_sheet.write(1,2,xlwt.Formula('A2-B2'))
w_sheet.write(2,2,xlwt.Formula('A3-B3'))
w_sheet.write(3,2,xlwt.Formula('A4-B4'))
w_sheet.write(4,2,xlwt.Formula('A4-B4'))
wb.save('Subtract.xls')

Hi try and understand that :D
import xlrd
import xlwt
from xlutils.copy import copy
import os
file_path = "test.xls"
rb = xlrd.open_workbook(file_path, formatting_info=True)
wb = copy(rb)
w_sheet = wb.get_sheet(0)
# For each sheet, do operation
for sheet in rb.sheets():
# For each row in actual sheet, row += 1 at each operation
for row in range(sheet.nrows - 1):
# Set index for operation and result
index = row + 2
# Create operation
operation = 'A' + str(index) + '-B' + str(index)
# Use operation
w_sheet.write(index-1, 2, xlwt.Formula(operation))
# Print for you can understand what it do
print ("index = " + str(index) + " operation = " + operation)
# Save in file_path.out.xls
wb.save(file_path + '.out' + os.path.splitext(file_path)[-1])

Related

adjust column width size using openpyxl

I'm facing trouble in adjusting column width of the below excel file. I'm using this code.
from openpyxl.utils import get_column_letter
ws.delete_cols(1) #remove col 'A' which has pandas index no.
for column in ws.iter_cols():
name = get_column_letter(column[0].column)
new_col_length = max(len(str(cell.value)) for cell in column)
#ws.column_dimensions[name].bestFit = True #I tried this but the result is same
ws.column_dimensions[name].width = new_col_length
excel sheet:
the output im getting.:
Something like this should manage the deletion of column A using Openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
path = 'col_width.xlsx'
wb = load_workbook(path)
ws = wb['Sheet1']
remerge_cells_list = []
# Remove existing merged cells, and
# Add the calculated new cell coords to remerge_cells_list to re-add after
# the deletion of column A.
for unmerge in range(len(ws.merged_cells.ranges)):
current_merge = ws.merged_cells.ranges[0]
new_min_col = get_column_letter(current_merge.min_col-1)
new_max_col = get_column_letter(current_merge.max_col-1)
remerge_cells_list.append(new_min_col + str(current_merge.min_row) + ':'
+ new_max_col + str(current_merge.max_row))
print("Removing merge: " + current_merge.coord)
ws.unmerge_cells(current_merge.coord)
print("\nDeleting column A\n")
ws.delete_cols(1) #remove col 'A' which has pandas index no.
# Set the column width dimenions
for column in ws.iter_cols():
name = get_column_letter(column[0].column)
new_col_length = max(len(str(cell.value)) for cell in column)
# ws.column_dimensions[name].bestFit = True #I tried this but the result is same
ws.column_dimensions[name].width = new_col_length+2 # Added a extra bit for padding
# Re-merge the cells from the remerge_cells_list
# Don't think it matters if this is done before or after resizing the columns
for merge in remerge_cells_list:
print("Add adjusted cell merge: " + merge)
ws.merge_cells(merge)
wb.save('col_width_out.xlsx')
After many hours of research finally, I found it.
NOTE : In the below code, sheet is the worksheet name. Usually in the documentation, we can see it as ws. Please don't forget to change the worksheet name.
# Imorting the necessary modules
try:
from openpyxl.cell import get_column_letter
except ImportError:
from openpyxl.utils import get_column_letter
from openpyxl.utils import column_index_from_string
from openpyxl import load_workbook
import openpyxl
from openpyxl import Workbook
for column_cells in sheet.columns:
new_column_length = max(len(str(cell.value)) for cell in column_cells)
new_column_letter = (get_column_letter(column_cells[0].column))
if new_column_length > 0:
sheet.column_dimensions[new_column_letter].width = new_column_length*1.23
UPDATE : This code doesn't work for all, but don't hesitate to try it..

updating cell's value in excel file using openpyxl without over writing with python

I want to update cell in excel file without over writing . I want to insert the new data into the same cell.
the following code as example: the old value is "RAI", I want to add "ANKIT" without over writing. so the cell's value become : "RAI", "ANKIT"
# import openpyxl module
import openpyxl
wb = openpyxl.Workbook()
sheet = wb.active
c1 = sheet.cell(row = 1, column = 1)
c1.value = "ANKIT"
wb.save("C:\\Users\\user\\Desktop\\demo.xlsx")
Try this:
if c1.value:
c1.value = c1.value + "\n" + "ANKIT"
else:
c1.value = "\n" + "ANKIT"

How to implement the iterative way to change the filename reading and how to combine result into single excel file

I am new to python. Have a task that have to find some of the following for all the excel files(1.xlsx-350.xlsx) around 350 excel files, which contained in single folder(Videos). and written following code it works fine but it is time consuming, manually have to change file name every iteration. even in the end of the process, I have to combine all 350 excel file operated data into single excel file. But in my code it overwrite each and every iteration. please help me to resolve this problem.
data12 = pd.read_excel (r'C:\Users\Videos\1.xlsx')
gxt = data12.iloc [:,0]
gyan = data12.iloc [:,1]
int= gyan.iloc[98:197]
comp= gyan.iloc[197:252]
seg= gyan.iloc[252:319]
A= max(int)
B= max(comp)
C= min(comp)
D= max(seg)
s = pd.Series([A, B, C, D])
frame_data= [gyan, comp, seg, stat]
result = pd.concat(frame_data)
result.to_excel("output.xlsx", sheet_name='modify_data', index=False)
thank you for helping.
Please check below code:
import pandas as pd
import numpy as np
import openpyxl
from openpyxl import load_workbook, Workbook
import os
# Give an excel filename and worksheet name
output='C:\Users\Videos\output.xlsx'
worksheet = 'Sheet'
wb = Workbook()
# If file not present at location, then create one
if os.path.isfile(output):
print('File Present')
else:
print('Creatted New file')
ws = wb.create_sheet(worksheet)
wb.save(output)
# Loop for all 350 files
for i in range(1, 351):
print('File {}:'.format(i))
data12 = pd.read_excel('C:\Users\Videos\{}.xlsx'.format(i))
gxt = data12.iloc [:,0]
gyan = data12.iloc [:,1]
int= gyan.iloc[8:19]
comp= gyan.iloc[19:25]
seg= gyan.iloc[25:31]
A= max(int)
B= max(comp)
C= min(comp)
D= max(seg)
s = pd.Series([A, B, C, D])
frame_data= [gyan, comp, seg]
result = pd.DataFrame(pd.concat(frame_data))
ws = wb.active
result_list = result.to_numpy()
print('Total rows = ', len(result_list))
for row in result_list.tolist():
ws.append(row)
wb.save(output)
This will help to run through all 350 files and save it to output file.
Also make changes to frame_data accordingly. I hope this works for you.
Flowing code gives you all the files in a folder
filenames = listdir(r'C:\Users\Videos')
count = 1
for file in filenames:
print (file)
.......
#At the end
output = "output-" + str(count) + ".xlsx"
count = count + 1
result = pd.concat(frame_data)
result.to_excel(output, sheet_name='modify_data', index=False)
For the master file, you can save the data in a pandas dataframe and keep appending each file in the loop.

comparing one column in two spreadsheets in openpyxl

I am making an excel comparing program but I seem to be stuck. I want to compare two excel files in a spreadsheet. Here is my code:
import openpyxl
wb = openpyxl.load_workbook('C:\\Users\\Bill\\Desktop\\CK_Server_list_0.1.xlsx')
ws = wb.active
wb1 =
openpyxl.load_workbook('C:\\Users\\Bill\\Desktop\\CK_Server_list_0.2.xlsx')
ws1 = wb1.active
for x in ws.iter_cols(max_col=1):
for cell in x:
print(cell.value, cell.coordinate)
for row1 in ws1.iter_cols(min_col=1):
if row1[0].value != ws.cell(row=x, column=1).value:
print(str(row1[0].value) + ' is not equal to ' + str(ws.cell(row=x, column=1).value + ' ' + str(ws.cell(row=x, column=1).coordinate)))
And every time I run this it gives me an error saying that tuple() < int(). Can anyone fix this problem? Any help would be appreciated.
This error pops up because your variable x contains a tuple of cell objects at the time when the line if row1[0].value != ws.cell(row=x, column=1).value: gets executed. The input argument row requires an int value instead.
I think that a good approach for your problem would be to use for loops in combination with zip statements (more on zip here):
import openpyxl
wb = openpyxl.load_workbook('C:\\Users\\Bill\\Desktop\\CK_Server_list_0.1.xlsx')
ws = wb.active
wb1 = openpyxl.load_workbook('C:\\Users\\Bill\\Desktop\\CK_Server_list_0.2.xlsx')
ws1 = wb1.active
for (col, col_1) in zip(ws.iter_cols(), ws1.iter_cols()):
for (cell, cell_1) in zip(col, col_1):
if cell.value != cell_1.value:
print(str(cell.value) + ' is not equal to ' + str(cell_1.value) + ' ' + str(cell.coordinate))

AttributeError while trying to sort, loop and write to worksheets using python

I am currently have a workbook with 5 worksheets. there is data in columns a - e, while each worksheet may have data in the same columns, each worksheet has different different amount rows though. I am entering a formula in column f that will go from cell F4 to whatever the last row is in that worksheet.
I am able to loop through sheets and am able to create a formula that goes from F4 to the last row, however I am unable get the two to work together.
Code
import os
import os.path
import urllib
import xlrd
import xlwt
from xlutils.copy import copy
fname = "test.xls"
destination = 'C:\Users\Raj Saha\Google Drive\Python\Excel-Integration'
rb = xlrd.open_workbook(fname,formatting_info=True) #original workbook
r_sheet = rb.sheet_by_index(1) #origianl worksheet
style = xlwt.easyxf('font: bold 1')
wb = copy(rb) #virtual workbook
#sheet = wb.get_sheet(1)
shxrange = range(rb.nsheets)
sh = rb.sheet_by_name("Data 1")
#print "total rows: %d, rows less headers: %d" % (nrows, rows)
for sheet in shxrange:
nrows = sheet.nrows
rows = nrows - 4
for i in range(rows):
sheet.write(i+3, 5, xlwt.Formula("B%d-C%d" % (i+4, i+4)))
sheet.write(2,5,"CL1-CL2",style)
wb.save(fname)
I get the following error message:
File "C:/formulas_multi_sheets.py", line 31, in
nrows = sheet.nrows
AttributeError: 'int' object has no attribute 'nrows'
I assume the error in line 31 would apply to line 32. I am using python 2.7.
Here, shxrange gives you integers. What you need is sheet class object. For getting object of every sheet of your workbook,
for sheet_no in shxrange:
sheet=rb.sheet_by_index(sheet_no)
nrows = sheet.nrows
rows = nrows - 4
for i in range(rows):
sheet.write(i+3, 5, xlwt.Formula("B%d-C%d" % (i+4, i+4)))
sheet.write(2,5,"CL1-CL2",style)
Due to the limitations of xlrd and wlwt, i turned to win32com. Here is the following solution that i came up with. I am able to sort data in multiple worksheets and add formulas to multiple worksheets.
Code
import win32com.client as win32
import types
from win32com.client import constants as c
fname = ('C:/Users/test.xls')
xlApp = win32.gencache.EnsureDispatch('Excel.Application')
xlApp.Visible = True
xlDescending = 2
xlSortColumns = 1
sh_range = range(2,6)
wb = xlApp.Workbooks.Open(fname)
for sh in wb.Sheets:
rangeFrom = 'F4'
column = 'F'
lastRow = sh.UsedRange.Rows.Count
rangeTo = rangeFrom + ':' + column + str(lastRow)
print "%r" % rangeTo
xlApp.Worksheets(sh.Name).Activate()
ws = xlApp.ActiveSheet
ws.Range('f3').Value = "CL1-CL2"
ws.Range('f4').Formula = "=B4-C4"
ws.Range('f4:f4').Select()
xlApp.Selection.AutoFill(ws.Range(rangeTo),win32.constants.xlFillDefault)
for i in sh_range:
xlApp.Sheets(i).Range("A4:E50000").Sort(Key1=xlApp.Sheets(i).Range("A4"), Order1=xlDescending, Orientation=xlSortColumns)
wb.Save()
wb.Close()

Categories