Looping through each cell of a column in Excel using Python - python

I have an Excel file with data in column A. I want to loop through each cell and stop once i reach the first cell that has a formula.
This is my code:
wb = openpyxl.load_workbook(r"path\filename.xlsx")
sheet = wb['Sheet1']
names=sheet['A']
for cellObj in names:
if cellObj.value.str[0] == '=':
print(cellObj.value)
But i get an error...
The response from #John Giorgio worked for that part.
Now what i'm trying to do next is to select the entire row and paste special.
Here's my code:
import openpyxl
wb=openpyxl.load_workbook(path1)
sheet = wb['Sheet1']
names=sheet['A']
for cellObj in names:
val = str(cellObj.value)
if val[0] == "=":
#print(val)
excel.Range("A10:TM10").Select() #PART I WANT TO SELECT IF TRUE
excel.Selection.PasteSpecial(Paste=-4163)
So once I come across the first cell with '=' i want to select the entire row or at least from column A to TM and hardcode it / pasteSpecial

wb = openpyxl.load_workbook(r"path\filename.xlsx")
sheet = wb['Sheet1']
names=sheet['A']
for cellObj in names:
val = str(cellObj.value)
if val[0] == "=":
print(val)

Related

Search specific text(text pattern) in excel and copy all resulting rows to another sheet in same workbook using openpyxl

I have an excel file with multiple sheets, 3rd column(contains around 500 rows) of sheet3 contains various names. I want to search column 3 for specific text and if it matches then copy the whole row along with the header row to new sheet within same excel.
Issue with "name column" is that most of the text refer to same item but naming convention is different, so:
अपर तहसीलदार,
नायाब तहसीलदार,
नायब तहसीलदार,
अतिरिक्त तहसीलदार
refers to same item but written differently, so for that I have to search for all variants.
I have no prior Python or openpyxl background so what I've got so far is:
import openpyxl
wb = openpyxl.load_workbook(r'C:/Users/Anas/Downloads/rcmspy.xlsx')
#active worksheet data
ws = wb.active
def wordfinder(searchString):
for i in range(1, ws.max_row + 1):
for j in range(1, ws.max_column + 1):
if searchString == ws.cell(i,j).value:
print("found")
print(ws.cell(i,j))
wordfinder("अपर तहसीलदार")
It is not showing any error but don't print anything either.
The excel sheet looks something like this:
I'm not certain, but I would suggest something along the lines of:
variants = {'alpha','alfa','elfa'}
data = []
rowCount = 0
for row in ws.values:
//each row is an array of cells
if rowCount == 0:
//header row
data.append(row)
elif row[2] in variants:
data.append(row)
rowCount += 1
wsNew = wb.create_sheet('Variations')
for line in data:
wsNew.append(line)
wb.save('newWorkbook.xlsx')

Replace part of a cell value with blank with openpyxl

I have a spread sheet where Column A has a list of computer names with domain\ preceding the computer name. I am trying to use openpyxl to remove the domain\ and leave just the computer names. Here is the code I have tried. There is no error however the script does not change anything on the spreadsheet.
import openpyxl
excelFile = openpyxl.load_workbook('C:\Users\user1\Documents\file.xlsx')
sheet1 = excelFile.get_sheet_by_name('Sheet1')
currentRow = 1
for eachRow in sheet1.iter_rows():
if sheet1.cell(row=currentRow, column=1).value == "'domain\'":
sheet1.cell(row=currentRow, column=1).value = ""
currentRow += 1
excelFile.save('C:\Users\user1\Documents\file.xlsx')
The easiest way is to replace the cell value with a trimmed string.
import openpyxl
filename = r'C:\Users\user1\Documents\file.xlsx'
excelFile = openpyxl.load_workbook(filename)
sheet1 = excelFile.active
for row in sheet1.iter_rows(min_col=1, max_col=1):
for cell in row:
if 'domain\\' in cell.value:
cell.value = cell.value[7:] #This will replace the cell value with a trimmed string
excelFile.save(filename)

How can I concatenate multiple rows of excel data into one?

I'm currently facing an issue where I need to bring all of the data shown in the images below into one line only.
So using Python and Openpyxl, I tried to write a parsing script that reads the line and only copies when values are non-null or non-identical, into a new workbook.
I get out of range errors, and the code does not keep just the data I want. I've spent multiple hours on it, so I thought I would ask here to see if I can get unstuck.
I've read some documentation on Openpyxl and about making lists in python, tried a couple of videos on youtube, but none of them did exactly what I was trying to achieve.
import openpyxl
from openpyxl import Workbook
path = "sample.xlsx"
wb = openpyxl.load_workbook(path)
ws = wb.active
path2 = "output.xlsx"
wb2 = Workbook()
ws2 = wb2.active
listab = []
rows = ws.max_row
columns = ws.max_column
for i in range (1, rows+1):
listab.append([])
cellValue = " "
prevCell = " "
for c in range (1, rows+1):
for r in range(1, columns+1):
cellValue = ws.cell(row=r, column=c).value
if cellValue == prevCell:
listab[r-1].append(prevCell)
elif cellValue == "NULL":
listab[r-1].append(prevCell)
elif cellValue != prevCell:
listab[r-1].append(cellValue)
prevCell = cellValue
for r in range(1, rows+1):
for c in range (1, columns+1):
j = ws2.cell(row = r, column=c)
j.value = listab[r-1][c-1]
print(listab)
wb2.save("output.xlsx")
There should be one line with the below information:
ods_service_id | service_name| service_plan_name| CPU | RAM | NIC | DRIVE |
Personally I would go with pandas.
import pandas as pd
#Loading into pandas
df_data = pd.read_excel('sample.xlsx')
df_data.fillna("NO DATA",inplace=True) ## Replaced nan values with "NO DATA"
unique_ids = df_data.ods_service_ids.unique()
#Storing pd into a list
records_list = df_data.to_dict('records')
keys_to_check = ['service_name', 'service_plan_name', 'CPU','RAM','NIC','DRIVE']
processed = {}
#Go through unique ids
for key in unique_ids:
processed[key] = {}
#Get related records
matching_records = [y for y in records_list if y['ods_service_ids'] == key]
#Loop through records
for record in matching_records:
#For each key to check, save in dict if non null
processed[key]['ods_service_ids'] = key
for detail_key in keys_to_check:
if record[detail_key] != "NO DATA" :
processed[key][detail_key] = record[detail_key]
##Note : doesn't handle duplicate values for different keys so far
#Records are put back in list
output_data = [processed[x] for x in processed.keys()]
# -> to Pandas
df = pd.DataFrame(output_data)[['ods_service_ids','service_name', 'service_plan_name', 'CPU','RAM','NIC','DRIVE']]
#Export to Excel
df.to_excel("output.xlsx",sheet_name='Sheet_name_1', index=False)
The above should work but I wasn't really sure on how you wanted to save duplicated records for the same id. Do you look to store them as DRIVE_0, DRIVE_1, DRIVE_2 ?
EDIT:
df could be exported in a different way. Replaced below #export to Excel with the following :
df.to_excel("output.xlsx",sheet_name='Sheet_name_1')
EDIT 2:
with no input data it was hard to see any flows. Corrected the code above with fake data
To be honest, I think you've managed to get confused by data structures and come up with something far more complicated than you need.
One approach that would suit would be to use Python dictionaries for each service, updating them row by row.
wb = load_workbook("sample.xlsx")
ws = wb.active
objs = {}
headers = next(ws.iter_rows(min_row=1, max_row=1, values_only=True))
for row in ws.iter_rows(min_row=2, values_only=True):
if row[0] not in objs:
obj = {key:value for key, value in zip(headers, row)}
objs[obj['ods_service_id']] = obj
else:# update dict with non-None values
extra = {key:value for key, value in zip(headers[3:], row[3:]) if value != "NULL"}
obj.update(extra)
# write to new workbook
wb2 = Workbook()
ws2 = wb2.active
ws2.append(headers)
for row in objs.values(): # do they need sorting?
ws2.append([obj[key] for key in headers])
Note how you can do everything without using counters.

copy value to certain column in different sheet using openpyxl

I am trying to get the column value from worksheet1 to worksheets2(in specific column), while skipping all the nul/None value in between. My code worked when I printed out all the values in worksheet1 column, exluding all the nul values. However when I saved it to worksheet2, it only showed the last value and duplicate that to the whole column(from row 2 to 20).
Don't know why only last value was written in the new column
from openpyxl import Workbook
from openpyxl import load_workbook
source_file = (r'XXX(Source file).xlsx')
dest_file = (r'XXX(dest file).xlsx')
wb1=load_workbook(source_file, data_only=True)
wb1.active=0
ws1=wb1.active
wb2=load_workbook(dest_file)
wb2.active=0
ws2=wb2.active
for a in range(9,43):
cell2 = ws1.cell(row = a, column = 10)
if cell2.value is None or cell2.value == 0:
continue
else:
print(cell2.value)
for b in range(2,20):
ws2.cell(row = b, column=4).value = cell2.value
wb2.save(dest_file)
Your second loop is nested so that it will always overwrite all values in the column of the second sheet with the same value from the first.
I'd do something like this:
idx = 2
for row in ws1.iter_rows(min_row=9, max_row=43, min_col=10, max_col=10):
cell = row[0]
if not cell.value:
ws2.cell(row=idx, column=4, value=cell.value)
idx += 1

Openpyxl: How to copy a row after checking if a cell contains specific value

I have a worksheet that is updated every week with thousands of rows and would need to transfer rows from this worksheet after filtering. I am using the current code to find the cells which has the value I need and then transfer the entire row to another sheet but after saving the file, I get the "IndexError: list index out of range" exception.
The code I use is as follows:
import openpyxl
wb1 = openpyxl.load_workbook('file1.xlsx')
wb2 = openpyxl.load_workbook('file2.xlsx')
ws1 = wb1.active
ws2 = wb2.active
for row in ws1.iter_rows():
for cell in row:
if cell.value == 'TrueValue':
n = 'A' + str(cell.row) + ':' + ('GH' + str(cell.row))
for row2 in ws1.iter_rows(n):
ws2.append(row2)
wb2.save("file2.xlsx")
The original code I used that used to work is below and has to be modified because of the large files which causes MS Excel not to open them (over 40mb).
n = 'A3' + ':' + ('GH'+ str(ws1.max_row))
for row in ws1.iter_rows(n):
ws2.append(row)
Thanks.
I'm not entirely sure what you're trying to do but I suspect the problem is that you have nested your copy loop.
Try the following:
row_nr = 1
for row in ws1:
for cell in row:
if cell.value == "TrueValue":
row_nr = cell.row
break
if row_nr > 1:
break
for row in ws1.iter_rows(min_row=row_nr, max_col=190):
ws2.append((cell.value for cell in row))
Question: I get the "IndexError: list index out of range" exception.
I get, from ws1.iter_rows(n)
UserWarning: Using a range string is deprecated. Use ws[range_string]
and from ws2.append(row2).
ValueError: Cells cannot be copied from other worksheets
The Reason are row2 does hold a list of Cell objects instead of a list of Values
Question: ... need to transfer rows from this worksheet after filtering
The following do what you want, for instance:
# If you want to Start at Row 2 to append Row Data
# Set Private self._current_row to 1
ws2.cell(row=1, column=1).value = ws2.cell(row=1, column=1).value
# Define min/max Column Range to copy
from openpyxl.utils import range_boundaries
min_col, min_row, max_col, max_row = range_boundaries('A:GH')
# Define Cell Index (0 Based) used to Check Value
check = 0 # == A
for row in ws1.iter_rows():
if row[check].value == 'TrueValue':
# Copy Row Values
# We deal with Tuple Index 0 Based, so min_col must have to be -1
ws2.append((cell.value for cell in row[min_col-1:max_col]))
Tested with Python: 3.4.2 - openpyxl: 2.4.1 - LibreOffice: 4.3.3.2
Use a list to hold the items in each column for the particular row.
Then append the list to your ws2.
...
def iter_rows(ws,n): #produce the list of items in the particular row
for row in ws.iter_rows(n):
yield [cell.value for cell in row]
for row in ws1.iter_rows():
for cell in row:
if cell.value == 'TrueValue':
n = 'A' + str(cell.row) + ':' + ('GH' + str(cell.row))
list_to_append = list(iter_rows(ws1,n))
for items in list_to_append:
ws2.append(items)
I was able to solve this with lists for my project.
import openpyxl
#load data file
wb1 = openpyxl.load_workbook('original.xlsx')
sheet1 = wb1.active
print("loaded 1st file")
#new template file
wb2 = openpyxl.load_workbook('blank.xlsx')
sheet2 = wb2.active
print("loaded 2nd file")
header = sheet1[1:1] #grab header row
listH =[]
for h in header:
listH.append(h.value)
sheet2.append(listH)
colOfInterest= 11 # this is my col that contains the value I'm checking against
for rowNum in range(2, sheet1.max_row +1): #iterate over each row, starting with 2 to skipping header from original file
if sheet1.cell(row=rowNum, column=colOfInterest).value is not None: #interested in non blank values in column 11
listA = [] # list which will hold my data
row = sheet1[rowNum:rowNum] #creates a tuple of row's data
#print (str(rowNum)) # for debugging to show what rows are copied
for cell in row: # for each cell in the row
listA.append(cell.value) # add each cell's data as an element in the list
if listA[10] == 1: # condition1 I'm checking for by looking up the index in the list
sheet2.append(listA) # appending the sheet2's next available row
elif listA[10] > 1: # condition2 I'm checking for by looking up the index in the list
# do something else and store it in bar
sheet2.append(bar) # appending the sheet2's next available row
print("saving file...")
wb2.save('result.xlsx') # save file
print("Done!")
Tested with: Python 3.7 openpyxl 2.5.4

Categories