Starting iterate from specific column - python

This is my .xslx file:
data.xlsx
from openpyxl import load_workbook
workbook = load_workbook('/path/', read_only=True)
first_sheet = workbook.sheetnames()[0]
worksheet = workbook[first_sheet]
for row in worksheet:
for cell in row:
print(cell.value)
How can I start iteration from A4? I need to print year and month value.

You can use iter_rows, either by specifying the offset or the range
for row in worksheet.iter_rows('A4:B9'):
for cell in row:
print(cell.value)
Btw: sheetnames()[0] should be sheetnames[0] or get_sheet_names()[0]

Related

I want to iterate this list in my excel file, can you show me a different way to do it?

My question is simple and I'm sorry to ask it here. But I tried several ways to iterate through my excel file and I'm having trouble finding the solution.
from openpyxl import workbook, load_workbook
wb = load_workbook("italian_team.xlsx")
ws = wb.active
rows = ws["A"]
equipe = ["Juventus", "Ac Milan", "Torino", "Pescara", "As Roma", "Genoa", "Napoli"]
for cell in rows:
x = equipe[cell]
wb.save("italian_team.xlsx")
Do you mean you just want to insert your list as a row in the workbook?
If so there are a few options, you could just append the list as is to the sheet in which case it will be enter after the last used row.
Or specify the row (and column) to add to.
Both options are shown in the code below
from openpyxl import workbook, load_workbook
wb = load_workbook("italian_team.xlsx")
ws = wb.active
# rows = ws["A"]
equipe = ["Juventus", "Ac Milan", "Torino", "Pescara", "As Roma", "Genoa", "Napoli"]
# for cell in rows:
# x = equipe[cell]
# This will append the list after the last used row
ws.append(equipe)
# This will enter the list at row 1 column 1 to the length of the list
# Use min_row = and max_col = as well if the list is to be on another row or start at another column
for row in ws.iter_rows(max_row=1, max_col=len(equipe)):
for enum, cell in enumerate(row):
cell.value = equipe[enum]
wb.save("italian_team.xlsx")

Replace part of a cell value with blank with openpyxl

I have a spread sheet where Column A has a list of computer names with domain\ preceding the computer name. I am trying to use openpyxl to remove the domain\ and leave just the computer names. Here is the code I have tried. There is no error however the script does not change anything on the spreadsheet.
import openpyxl
excelFile = openpyxl.load_workbook('C:\Users\user1\Documents\file.xlsx')
sheet1 = excelFile.get_sheet_by_name('Sheet1')
currentRow = 1
for eachRow in sheet1.iter_rows():
if sheet1.cell(row=currentRow, column=1).value == "'domain\'":
sheet1.cell(row=currentRow, column=1).value = ""
currentRow += 1
excelFile.save('C:\Users\user1\Documents\file.xlsx')
The easiest way is to replace the cell value with a trimmed string.
import openpyxl
filename = r'C:\Users\user1\Documents\file.xlsx'
excelFile = openpyxl.load_workbook(filename)
sheet1 = excelFile.active
for row in sheet1.iter_rows(min_col=1, max_col=1):
for cell in row:
if 'domain\\' in cell.value:
cell.value = cell.value[7:] #This will replace the cell value with a trimmed string
excelFile.save(filename)

Reading values from Excel workbook in every worksheet in particular column

I'd like to read the values from column B in every worksheet within my workbook.
After a fair amount of reading and playing around I can return the cell names of the cells I want the values from, but I can't figure out how to get the values.
from openpyxl import load_workbook
wb = load_workbook(r"C:/Users/username/Documents/test.xlsx")
for sheet in wb.worksheets:
for row in range(2,sheet.max_row+1):
for column in "B":
cell_name = "{}{}".format(column, row)
print (cell_name)
This is returning the cell names (i.e. B2, B3) that have values in column B in every worksheet.
According to the documentation https://openpyxl.readthedocs.io/en/stable/usage.html you can access cell values as:
sheet['B5'].value
Replace B5 with the cell(s) you need.
import xlrd
loc = ("foo.xlsx") # excel file name
wb = xlrd.open_workbook(loc)
# sheet = wb.sheet_by_index(0)
for sheet in wb.sheets():
for i in range(sheet.nrows):
print(sheet.cell_value(i, 1))
Edit: I edited my answer to read all sheets in excel file.
just play with the range
from openpyxl import load_workbook
wb = load_workbook('')
for sheet in wb:
for i in range(1,50):
if sheet['B'+str(i)].value:
print(sheet['B'+str(i)].value)
Better one,
from openpyxl import load_workbook
wb = load_workbook('')
for sheet in wb:
for row in sheet['B']:
print(row.value)

How to split merged Excel cells with Python?

I am trying to split only the merged cells in Excel file (with multiple sheets) that are like:
Please note that there are partially/fully empty rows. These rows are not merged.
Using openpyxl, I found the merged cell ranges in each sheet with this code:
wb2 = load_workbook('Example.xlsx')
sheets = wb2.sheetnames ##['Sheet1', 'Sheet2']
for i,sheet in enumerate(sheets):
ws = wb2[sheets[i]]
print(ws.merged_cell_ranges)
The print output:
['B3:B9', 'B13:B14', 'A3:A9', 'A13:A14', 'B20:B22', 'A20:A22']
['B5:B9', 'A12:A14', 'B12:B14', 'A17:A18', 'B17:B18', 'A27:A28', 'B27:B28', 'A20:A22', 'B20:B22', 'A3:A4', 'B3:B4', 'A5:A9']
Since I found the merged cell ranges, I need to split the ranges and fill in the corresponding rows like this:
How can I split like this using openpyxl? I am new to using this module. Any feedback is greatly appreciated!
You need to use the unmerge function. Example:
ws.unmerge_cells(start_row=2,start_column=1,end_row=2,end_column=4)
when you use unmerge_cells function, sheet.merged_cells.ranges will be modified, so don't use sheet.merged_cells.ranges in for loop.
from openpyxl.workbook import Workbook
from openpyxl import load_workbook
from openpyxl.utils.cell import range_boundaries
wb = load_workbook(filename = 'tmp.xlsx')
for st_name in wb.sheetnames:
st = wb[st_name]
mcr_coord_list = [mcr.coord for mcr in st.merged_cells.ranges]
for mcr in mcr_coord_list:
min_col, min_row, max_col, max_row = range_boundaries(mcr)
top_left_cell_value = st.cell(row=min_row, column=min_col).value
st.unmerge_cells(mcr)
for row in st.iter_rows(min_col=min_col, min_row=min_row, max_col=max_col, max_row=max_row):
for cell in row:
cell.value = top_left_cell_value
wb.save('merged_tmp.xlsx')

Copy paste column range using OpenPyxl

Hi so I am trying to copy and paste W7:W46 column into another worksheet. The code I have so far,
col_j = New_Burden['W']
for idx, cell in enumerate(col_j,1):
ws1.cell(row = idx, column = 10).value = cell.value
is able to copy over the entire column, but unfortunately transfers the various headers as well. One solution I have tried is:
for row in New_Burden['W7:W46']:
for cell in row:
ws1.cell(row = 2, column = 10).value = cell.value
But that only copies the first value of W7
Copy a Range(['W7:W46']) from one Worksheet to another Worksheet:
If the Ranges are not overlapping, it's also possible in the same Worksheet.
from openpyxl import Workbook
# Create a new Workbook
wb = Workbook()
ws = wb.worksheets[0]
from openpyxl.utils import range_boundaries
# Define start Range(['J2']) in the new Worksheet
min_col, min_row, max_col, max_row = range_boundaries('J2')
# Iterate Range you want to copy
for row, row_cells in enumerate(New_Burden['W7:W46'], min_row):
for column, cell in enumerate(row_cells, min_col):
# Copy Value from Copy.Cell to given Worksheet.Cell
ws.cell(row=row, column=column).value = cell.value
If you want to do the above with multiple different Columns,
use the above in a function:
def copy_range(source_range, target_start):
# Define start Range(target_start) in the new Worksheet
min_col, min_row, max_col, max_row = range_boundaries(target_start)
# Iterate Range you want to copy
for row, row_cells in enumerate(New_Burden[source_range], min_row):
for column, cell in enumerate(row_cells, min_col):
# Copy Value from Copy.Cell to given Worksheet.Cell
ws.cell(row=row, column=column).value = cell.value
for source_range, target_start in [('W7:W46','J2'), ('Y7:Y46','A2')]:
copy_range(source_range, target_start)
Tested with Python: 3.4.2 - openpyxl: 2.4.1 - LibreOffice: 4.3.3.2

Categories