Making my excel formatting python script more efficient - python

###Im trying to create a program that checks a column in excel and moves specific rows with specific values to another sheet in the same workbook, but I was wondering if there was a more efficient way to do this using pandas or something else.###
import time
start_time = time.perf_counter ()
import openpyxl
wb = openpyxl.load_workbook("Test.xlsx")
ws=wb.active
mr,mc=ws.max_row,ws.max_column
column_string=input("Enter Column Letter with Email (A or B or C or leave blank to skip editing):").upper()
if len(column_string)>0:
for cell in ws[column_string][1:]:
if cell.value is None:
ws_1=wb.create_sheet('Linkedin Only')
for i in range (1, mr +1):
for j in range (1, mc + 1):
c = ws.cell(row = i, column = j)
ws_1.cell(row = i, column = j).value = c.value
break
for cell in ws_1[column_string][1:]:
if cell.value is not None:
ws_1.delete_rows(cell.row)
for cell in ws[column_string][1:]:
if cell.value is None:
ws.delete_rows(cell.row)
wb.save("Test.xlsx")
else:
wb.save("Test.xlsx")
end_time = time.perf_counter ()
print(end_time - start_time, "seconds")

Related

I lose leading zeros when copy data from dataframe to openpyxl.workbook

I use openpyxl and pandas to fill row color with specified condition. Everything works fine but in some cells I lose leading zeros (like 0345 -> output 345), I don't want that. How can I get the exact data?
dt = pd.read_excel(file_luu, sheet_name="Sheet1")
dt = pd.DataFrame(dt)
dinhDanh = len(dt.columns) - 1
wb = load_workbook(file_luu)
print(type(wb))
ws = wb['Sheet1']
for i in range(0, dt.shape[1]):
ws.cell(row=1, column=i + 1).value = dt.columns[i]
for row in range(dt.shape[0]):
for col in range(dt.shape[1] ):
ws.cell(row + 2, col + 1).value = str(dt.iat[row, col]) if (str(dt.iat[row, col]) != "nan") else " "
if dt.iat[row, dinhDanh] == True:
ws.cell(row + 2, col + 1).fill = PatternFill(start_color='FFD970', end_color='FFD970',
fill_type="solid") # used hex code for brown color
ws.delete_cols(1)
ws.delete_cols(dinhDanh)
wb.save(file_luu)
Copy exactly all characters

openpyxl - Find the maximum length of a cell for each column

It's my first time to use openpyxl. I want to know the size of the longest cell for each column in Excel. I tried hard to write the code, but the output is in row, and even that doesn't come out correctly. How can I fix it what I want? If you know, please reply, thank you
import openpyxl
filepath = "test.xlsx"
wb = openpyxl.load_workbook(filepath)
ws = wb.active
max_row = ws.max_row
max_column = ws.max_column
for i in range(1, max_row + 1):
max_length = 0
for j in range(1, max_column + 1):
try:
if len(str(ws.cell(row=i, column=j).value)) > max_length:
max_length = len(ws.cell(row=i, column=j).value)
except:
pass
print(max_length)
Well, you can use the ws.iter_cols(), like #CharlieClark mentioned in the comments. Here's an example:
maxLen = float('-inf')
columns = sht.iter_cols(2, 2) # The (2, 2) is the mincol to maxcol, including the max column itself, e.g. if you want to iterate through column 7, you do (7, 7)
for col in columns:
for cellRow in col: #cellRow is the specific cell, not its value
maxLen = len(cellRow.value) if len(cellRow.value) > maxLen else maxLen #Sets maxLen to the length of cellRow.value if it is larger than maxLen.

Copying same range multiple times from one workbook to another

Per the attached image, I am trying to copy and paste the same data into a different format.
I have figured out the first part of the code but I need help abbreviating the 2nd half after this comment:
"Fills in the concepts per store group step by step"
Currently, this code is not efficient and I would like to have it compressed into just a couple of lines.
Image of desired result (Right hand side):
Here is the code I have cobbled together so far:
import openpyxl as xl;
filename ="c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist.xlsx"
wb1 = xl.load_workbook(filename)
ws1 = wb1.worksheets[0]
# opening the destination excel file
filename1 ="c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist2.xlsx"
wb2 = xl.load_workbook(filename1)
ws2 = wb2.worksheets[0]
# copying the cell values from source
# excel file to destination excel file
rowctsq = ws1['A1']
j = 0
while j < rowctsq.value:
j = j + 3
for i in range (3 , 6):
# reading cell value from source excel file
# Populates the store list repeatedly
c = ws1.cell(row = i, column = 1)
ws2.cell(row =i , column = 1).value = c.value
ws2.cell(row =i + j , column = 1).value = c.value
# Fills in the concepts per store group step by step
c = ws1.cell(row = i, column = 2)
ws2.cell(row =i , column = 3).value = c.value
c = ws1.cell(row = i, column = 3)
ws2.cell(row =i + 3 , column = 3).value = c.value
c = ws1.cell(row = i, column = 4)
ws2.cell(row =i + 6 , column = 3).value = c.value
c = ws1.cell(row = i, column = 5)
ws2.cell(row =i + 9 , column = 3).value = c.value
# saving the destination excel file
wb2.save('c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist2.xlsx')
Hopefully, I get extra community points for answering my own question! I worked through this and have pretty much gotten to my destination. Here's the code I came up with. Works like a charm. :)
import openpyxl as xl;
filename ="c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist.xlsx"
wb1 = xl.load_workbook(filename)
ws1 = wb1.worksheets[0]
# opening the destination excel file
filename1 ="c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist2.xlsx"
wb2 = xl.load_workbook(filename1)
ws2 = wb2.worksheets[0]
# copying the cell values from source
# excel file to destination excel file
rowctsq = ws1['A1']
j = 0
k = 0
while j < rowctsq.value and k < 6:
j = j + 3
k = k + 1
for i in range (3 , 6):
# reading cell value from source excel file
# Populates store column
c = ws1.cell(row = i, column = 1)
ws2.cell(row =i + j , column = 1).value = c.value
# Populates concept 'x' column
c = ws1.cell(row = i, column = 1 + k)
ws2.cell(row =i + j , column = 3).value = c.value
# Populates concept name column
c = ws1.cell(row = 2, column = 1 + k)
ws2.cell(row =i + j , column = 2).value = c.value
# saving the destination excel file
wb2.save('c:\\Users\kevin\Documents\Python Programs\Excel Python\Conceptlist2.xlsx')

Openpyxl in python making for loop to merge specific row from many excel files into one

I'm trying to make an python code that takes out a specific row from a excel sheet and copies into one. I want to do this with many files (about 1000 files). So I want a specific row in every excel document to get put in to a new row in the destination excel file. How do i modify my code to do this?
Here is what I have right now:
filename =str('c:\\user\\script\\results\\viktor\\ESS_result_DetectorE503B35_Sewage.xlsx')
wb1 = xl.load_workbook(filename)
ws1 = wb1.worksheets[0]
filename1 ="c:\\user\\script\\results\\viktor\\ESS.xlsx"
wb2 = xl.load_workbook(filename1)
ws2 = wb2.active
mr = 2
mc = ws1.max_column
for i in filename:
for j in range (1, mc + 1):
c = ws1.cell(row = 3, column = j)
ws2.cell(row = 3, column = j).value = c.value
wb2.save(str(filename1))
In ur case, u shouldn't loop the string of filename but list of filenames.
filenames = [ur filenames here]
source_xl_filename = r'xxxxxx'
source_wb = xl.load_workbook(source_xl_filename)
ws1 = source_wb.worksheets[0]
def batch_save_xl(filename):
wb2 = xl.load_workbook(filename)
ws2 = wb2.active
mr = 2
mc = ws1.max_column
for i in filename:
for j in range (1, mc + 1):
c = ws1.cell(row = 3, column = j)
ws2.cell(row = 3, column = j).value = c.value
wb2.save(str(filename1))
for filename in filenames:
batch_save_xl(filename)

Populate different cells in a column with different values using a "for" loop

I'm trying to populate the first 9 cells in a first row with different values in an excel spreadhseet. The code as is populates the first 9 cells as expected BUT instead of populating each of the cells with "j" variable string values - "a","b","c","d","e" in each of the cells it populates all 9 cells with only last value - "e". How can I make the code to iterate through the string assigned in "j" and populate the cells in the spreadsheet with each of the string letters?
Python version 3.6,
IDE: Pycharm
Here is the code:
import xlsxwriter
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet()
for h in range(0, 9): #Cell position generator
u = 1
cell_position = (u + h)
g = "A"
f = str(cell_position)
iterated_cell_position = [g+f]#puts cell positions in a list
j = "abcde"
for p in iterated_cell_position:
for e in j:
worksheet.write(p, e)
workbook.close()
Please help me with this?
Thank you.
your iterated_cell_position is an array of one element, and the line
for e in j:
worksheet.write(p, e)
just writes each letter to the same cell. So you write a to the cell, then b to the cell, then c and so on. Try
import xlsxwriter
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet = workbook.add_worksheet()
j = "abcde"
for h in range(0, 9): #Cell position generator
e = j[h % 5] # gets the correct letter in j (wraps around when h gets too large)
cell_position = "A{}".format(h + 1)
worksheet.write(cell_position, e)
workbook.close()

Categories