I'm trying to read values from an xlsx file containing formulas using openpyxl; however, I noticed that for some cells, I'm getting a wrong value.
Here's the XLSX example:
Here's the result I get:
The code:
wb = openpyxl.load_workbook(excel_file, data_only=True)
# getting all sheets
sheets = wb.sheetnames
# getting a particular sheet
worksheet = wb["Feuil1"]
# getting active sheet
active_sheet = wb.active
# reading a cell
excel_data = list()
# iterating over the rows and
# getting value from each cell in row
for row in worksheet.iter_rows():
row_data = list()
for cell in row:
return render(request, 'myapp/index.html', {"excel_data":excel_data})
This My answer for get data from excel file with xlrd.
import xlrd
from xlrd import open_workbook
fp = tempfile.NamedTemporaryFile(delete= False, suffix=filetype)
fp.write(binascii.a2b_base64(selected file))
workbook = xlrd.open_workbook(file name)
sheet = workbook.sheet_by_name(sheet name)
row = [c or '' for c in sheet.row_values(header_row)]
first_row = []
for col in range(sheet.ncols):
first_row.append(sheet.cell_value(0,col) )
archive_lines = []
for row in range(1, sheet.nrows):
elm = {}
for col in range(sheet.ncols):
I have researched through several similar threads of discussion on this forum and tried several things recommended, but I am not able to get all properties of the source worksheet copied over. Here's my code and I see that column widths and a few other things are not coped over. Would have been great if openpyxl implemented a function to copy a worksheet with all its attributes.
def copy_worksheet(src_xl, dest_xl, src_ws, dest_ws):
import openpyxl as xl
from copy import copy
# opening the source excel file
wb1 = xl.load_workbook(src_xl)
sheet_names = wb1.sheetnames
index = sheet_names.index(src_ws)
ws1 = wb1.worksheets[index]
# opening the destination excel file
wb2 = xl.load_workbook(dest_xl)
sheet_names = wb2.sheetnames
index = sheet_names.index(dest_ws)
ws2 = wb2.create_sheet(dest_ws)
ws2 = wb2.worksheets[index]
# calculate total number of rows and
# columns in source excel file
mr = ws1.max_row
mc = ws1.max_column
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr + 1):
for j in range (1, mc + 1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
cell = c
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
new_cell = ws2.cell(row = i, column = j)
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
# saving the destination excel file
This seems to do the job - to set the column widths:
from openpyxl.utils import get_column_letter
for i in range(ws1.max_column):
ws2.column_dimensions[get_column_letter(i+1)].width = ws1.column_dimensions[get_column_letter(i+1)].width
As of now i can read EXCEL file's all sheet.
e.msgbox("select Excel File")
updated_deleted_xls = e.fileopenbox()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
openfile = e.fileopenbox()
for sheet in book.sheets():
for row in range(sheet.nrows):
for col in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
If you open your editor from the desktop or command line, you would have to specify the file path while trying to read the file:
import pandas as pd
df = pd.read_excel(r'File path', sheet_name='Sheet name')
Alternatively, if you open your editor in the file's directory, then you could read directly using the panda library
import pandas as pd
df = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='Title Sheet')
df1 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx',sheet_name='Transactions')
df2 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='NewCustomerList')
df3 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerDemographic')
df4 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerAddress')
Maybe Pandaswould be helpful ( the go-to package for data) :
import pandas as pd
df = pd.read_excel('filname.xls', sheet = 0)
Edit: Since a lot of time has passed and pandas matured the arguemnts have change. So for pandas >1.0.0
import pandas as pd
df = pd.read_excel('filname.xls', sheet_name = 0)
You can use book.sheet_by_name() to read specific sheets by their name from xls file.
for name, sheet_name in zip(filename, sheetnumber):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
filename is the path to your xls file. Specify the sheet number you need to read in sheetnumber.
Alternatively, you could use book.sheet_by_index() and pass argument to return a specific sheet.
From docs:
Parameters: sheetx – Sheet index in range(nsheets)
For example:
first_sheet = book.sheet_by_index(0) # returns the first sheet.
You can use either book.sheet_by_name() or book.get_sheet()
Example using get_sheet()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet = book.get_sheet(0) #Gets the first sheet.
Example using sheet_by_name()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet_names = book.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
MoreInfo on getting sheet by sheet_by_name
I'm trying to write multiple excels' column A into a new excel's column A (assuming all the excels have one worksheet each.) I've written some code, which can write one excel's column A into the new excel's column A; but if there are multiple excels, the new excel's column A will be overwritten multiple times. So how could I just add all the column As to the new excel sheet one after another without overwriting each other?
Below are my code:
import os, openpyxl
path = os.getcwd()
def func(file):
for file in os.listdir(path):
if file.endswith('.xlsx'):
wb = openpyxl.load_workbook(file)
sheet = wb.active
colA = sheet['A']
wb = openpyxl.Workbook()
r = 1
for i in colA:
sheet = wb.active
sheet.cell(row=r, column=1).value = i.value
r += 1
Thank you so much!!
you could proceed for example as:
import os, openpyxl
path = os.getcwd()
def func(outputFile):
c = 0
#create output workbook
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
c += 1 #move to the next column in output
wb = openpyxl.load_workbook(fName)
sheet = wb.active #input sheet
#for r in range(1, sheet.max_row+1):
# sheetOut.cell(row=r, column=c).value = sheet.cell(row = r, column = 1).value
for r, cell in enumerate(sheet['A']):
sheetOut.cell(row = r+1, column = c).value = cell.value
#"concatenate" all columns A into one single column
def funcAppend(outputFile):
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
r = 1
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
wb = openpyxl.load_workbook(fName)
sheet = wb.active
for cell in sheet['A']:
sheetOut.cell(row = r, column = 1).value = cell.value
r += 1
I want to read the data given in 2nd and 3rd column from XLSX file.
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
cols = (sheet.row_values(row,1)) and (sheet.row_values(row,2))
But is gives below error when i executed above script..
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) File
C:\Python27\.....\xlrd_init_.py", line 1323, in getbof raise
XLRDError('Expected BOF record; found 0x%04x' % opcode)
xlrd.biffh.XLRDError: Expected BOF record; found 0x4b50
Try this
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheets = workbook.sheet_names()
required_data = []
for sheet_name in sheets:
sh = workbook.sheet_by_name(sheet_name)
for rownum in range(sh.nrows):
row_valaues = sh.row_values(rownum)
required_data.append((row_valaues[0], row_valaues[1]))
print required_data
This example read all the content of the excel sheet and puts it in a matrix (list of lists), then you can use the columns you need:
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheet = workbook.sheet_by_index(0)
rows = []
for i in range(sheet.nrows):
columns = []
for j in range(sheet.ncols):
columns.append(sheet.cell(i, j).value)
print rows
I have a list of excel files and their corresponding sheet number. I need python to go to those sheets and find out the cell location for a particular content. Thanks to "alecxe", I used the following code and it worked well.
import xlrd
value = 'Avg.'
fn = ('C:/ab1.xls', 'C:/ab2.xls','C:/ab3.xls','C:/ab4.xls','C:/ab5.xls',)
sn = ('505840', '505608', '430645', '505464', '505084')
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Later I wanted to make changes and instead of writing down the filename and sheetnumber, I wanted python to grab them from an excel sheet. But the program is not printing anything. Can anyone show me where I made the mistake? Highly appreciate your comment!
import xlrd
import glob
import os
value = 'Avg.'
sheetnumber = []
filename = []
xlfile = "C:\\Users\\tsengineer\\Desktop\\Joydip Trial\\Simple.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
fn = tuple(filename)
sn = tuple(sheetnumber)
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Definitely for some reasons, the loop is not working as I am getting two empty lists as output. Any thoughts?
import xlrd
value = 'Avg.'
sheetnumber = []
filename = []
rowlist = []
columnlist = []
xlfile = "C:/Users/Joyd/Desktop/Experiment/Simple_1.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
fn = tuple(filename)
sn = tuple(sheetnumber)
for fname, sname in zip(fn, sn):
book = xlrd.open_workbook(fname)
sheet = book.sheet_by_name(sname)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print rowlist
print columnlist