Replace special character "-" with 0 integer value - python

I am traversing an excel sheet using openpyxl() and copying over the contents to another excel.
import openpyxl
NEW_EXCEL_FILE = openpyxl.load_workbook('workbook1.xlsx')
NEW_EXCEL_FILE_WS = NEW_EXCEL_FILE.active
SHEET = NEW_EXCEL_FILE.get_sheet_by_name(sheet_name)
for i,row in enumerate(SHEET.iter_rows()):
for j,col in enumerate(row):
col.value = col.value.replace("-", 0)
NEW_FILE_SHEET.cell(row=i+1,column=j+1).value = col.value
NEW_EXCEL_FILE.save('workbook1.xlsx')
I need to replace the cell contents which has "-" to 0.
when i tried using col.value.replace("-", 0), it is not accepting int value.
I am getting the below exception,
TypeError: replace() argument 2 must be str, not int
please help.
Thanks,

you can do it by using .cell(row=i, column=j).value and for loop. So you can try this:
import openpyxl
NEW_EXCEL_FILE = openpyxl.load_workbook(filename="workbook1.xlsx")
NEW_EXCEL_FILE_WS = NEW_EXCEL_FILE.active
num_columns = NEW_EXCEL_FILE_WS.max_column
num_row = NEW_EXCEL_FILE_WS.max_row
for i in range(1, num_row+1):
for j in range(1, num_columns+1):
if NEW_EXCEL_FILE_WS.cell(row=i, column=j).value == "-":
NEW_EXCEL_FILE_WS.cell(row=i, column=j).value = 0
NEW_EXCEL_FILE.save(filename="workbook1.xlsx")

Related

how to write dynamic lines from pdf to excel

import pdfplumber
import openpyxl
pdf = pdfplumber.open("path")
page = pdf.pages [0]
text = page.extract_text()
lin = text.split("\n")
wb = openpyxl.load_workbook ("path")
ws= wb.active
ws.title = "S1"
u = int(0)
i = int(1)
for u in lin:
ws.cell(row = i, column =1).value = lin [u]
u = u+1
i = i+1
wb.save ("path")
pdf.close
print("Ok!")
Error:
TypeError: list indices must be integers or slices, not str
The error occurs on for.
I split each line of the pdf and now I want to write each line in excel.
Example:
line in specific pdf file:
A
B
C
D
I got every line value from the pdf. The variable "lin" for example, has the value A in line 0, the value B in line 1.
I want to take the value of lin 0 and write it in cell A1, and then in the same column take the value of lin 1 and write it in cell A2 in excel and so on.
You should use an integer variable u as the index.
u = 0
i = 1
for u in range(len(lin)):
ws.cell(row=i, column=1).value = lin[u]
u += 1
i += 1
You can also replace the for loop with a for-each loop:
for value in lin:
ws.cell(row=i, column=1).value = value
i += 1
In this case you don't need to worry about the index variable.
pdf.close()

I lose leading zeros when copy data from dataframe to openpyxl.workbook

I use openpyxl and pandas to fill row color with specified condition. Everything works fine but in some cells I lose leading zeros (like 0345 -> output 345), I don't want that. How can I get the exact data?
dt = pd.read_excel(file_luu, sheet_name="Sheet1")
dt = pd.DataFrame(dt)
dinhDanh = len(dt.columns) - 1
wb = load_workbook(file_luu)
print(type(wb))
ws = wb['Sheet1']
for i in range(0, dt.shape[1]):
ws.cell(row=1, column=i + 1).value = dt.columns[i]
for row in range(dt.shape[0]):
for col in range(dt.shape[1] ):
ws.cell(row + 2, col + 1).value = str(dt.iat[row, col]) if (str(dt.iat[row, col]) != "nan") else " "
if dt.iat[row, dinhDanh] == True:
ws.cell(row + 2, col + 1).fill = PatternFill(start_color='FFD970', end_color='FFD970',
fill_type="solid") # used hex code for brown color
ws.delete_cols(1)
ws.delete_cols(dinhDanh)
wb.save(file_luu)
Copy exactly all characters

Making my excel formatting python script more efficient

###Im trying to create a program that checks a column in excel and moves specific rows with specific values to another sheet in the same workbook, but I was wondering if there was a more efficient way to do this using pandas or something else.###
import time
start_time = time.perf_counter ()
import openpyxl
wb = openpyxl.load_workbook("Test.xlsx")
ws=wb.active
mr,mc=ws.max_row,ws.max_column
column_string=input("Enter Column Letter with Email (A or B or C or leave blank to skip editing):").upper()
if len(column_string)>0:
for cell in ws[column_string][1:]:
if cell.value is None:
ws_1=wb.create_sheet('Linkedin Only')
for i in range (1, mr +1):
for j in range (1, mc + 1):
c = ws.cell(row = i, column = j)
ws_1.cell(row = i, column = j).value = c.value
break
for cell in ws_1[column_string][1:]:
if cell.value is not None:
ws_1.delete_rows(cell.row)
for cell in ws[column_string][1:]:
if cell.value is None:
ws.delete_rows(cell.row)
wb.save("Test.xlsx")
else:
wb.save("Test.xlsx")
end_time = time.perf_counter ()
print(end_time - start_time, "seconds")

openpyxl - Find the maximum length of a cell for each column

It's my first time to use openpyxl. I want to know the size of the longest cell for each column in Excel. I tried hard to write the code, but the output is in row, and even that doesn't come out correctly. How can I fix it what I want? If you know, please reply, thank you
import openpyxl
filepath = "test.xlsx"
wb = openpyxl.load_workbook(filepath)
ws = wb.active
max_row = ws.max_row
max_column = ws.max_column
for i in range(1, max_row + 1):
max_length = 0
for j in range(1, max_column + 1):
try:
if len(str(ws.cell(row=i, column=j).value)) > max_length:
max_length = len(ws.cell(row=i, column=j).value)
except:
pass
print(max_length)
Well, you can use the ws.iter_cols(), like #CharlieClark mentioned in the comments. Here's an example:
maxLen = float('-inf')
columns = sht.iter_cols(2, 2) # The (2, 2) is the mincol to maxcol, including the max column itself, e.g. if you want to iterate through column 7, you do (7, 7)
for col in columns:
for cellRow in col: #cellRow is the specific cell, not its value
maxLen = len(cellRow.value) if len(cellRow.value) > maxLen else maxLen #Sets maxLen to the length of cellRow.value if it is larger than maxLen.

python xlrd string match

I couldnt find anything in the API. Is there a way to return the row number or coordinate of a cell based on a string match? For instance: You give the script a string and it scans through the .xls file and when it finds a cell with the matching string, it returns the coordinate or row number.
for i in range(sheet.nrows):
row = sheet.row_values(i)
for j in range(len(row)):
if row[j] == search_value:
return i,j
return None
something like that... just a basic search
You could try the following function, thank you Joran
def look4_xlrd (search_value, sheet) :
lines = []
columns = []
for i in range (sheet.nrows) :
row = sheet.row_values(i)
for j in range(len(row)) :
if row[j] == search_value :
lines.append(i)
columns.append(j)
del row
return lines, columns

Categories