I'm trying to read values from an xlsx file containing formulas using openpyxl; however, I noticed that for some cells, I'm getting a wrong value.
Here's the XLSX example:
Here's the result I get:
The code:
wb = openpyxl.load_workbook(excel_file, data_only=True)
# getting all sheets
sheets = wb.sheetnames
print(sheets)
# getting a particular sheet
worksheet = wb["Feuil1"]
print(worksheet)
# getting active sheet
active_sheet = wb.active
print(active_sheet)
# reading a cell
print(worksheet["A1"].value)
excel_data = list()
# iterating over the rows and
# getting value from each cell in row
for row in worksheet.iter_rows():
row_data = list()
for cell in row:
#cell.number_format='0.0########'
print(cell.number_format)
row_data.append(str(cell.value))
print(cell.value)
excel_data.append(row_data)
return render(request, 'myapp/index.html', {"excel_data":excel_data})
Hey What you want from an open excel file means which type of format do you gate
data,
This My answer for get data from excel file with xlrd.
import xlrd
from xlrd import open_workbook
fp = tempfile.NamedTemporaryFile(delete= False, suffix=filetype)
fp.write(binascii.a2b_base64(selected file))
workbook = xlrd.open_workbook(file name)
sheet = workbook.sheet_by_name(sheet name)
row = [c or '' for c in sheet.row_values(header_row)]
first_row = []
for col in range(sheet.ncols):
first_row.append(sheet.cell_value(0,col) )
archive_lines = []
for row in range(1, sheet.nrows):
elm = {}
for col in range(sheet.ncols):
elm[first_row[col]]=sheet.cell_value(row,col)
archive_lines.append(elm)
Related
I have researched through several similar threads of discussion on this forum and tried several things recommended, but I am not able to get all properties of the source worksheet copied over. Here's my code and I see that column widths and a few other things are not coped over. Would have been great if openpyxl implemented a function to copy a worksheet with all its attributes.
def copy_worksheet(src_xl, dest_xl, src_ws, dest_ws):
import openpyxl as xl
from copy import copy
# opening the source excel file
wb1 = xl.load_workbook(src_xl)
sheet_names = wb1.sheetnames
index = sheet_names.index(src_ws)
ws1 = wb1.worksheets[index]
# opening the destination excel file
wb2 = xl.load_workbook(dest_xl)
sheet_names = wb2.sheetnames
try:
index = sheet_names.index(dest_ws)
except:
ws2 = wb2.create_sheet(dest_ws)
else:
ws2 = wb2.worksheets[index]
# calculate total number of rows and
# columns in source excel file
mr = ws1.max_row
mc = ws1.max_column
# copying the cell values from source
# excel file to destination excel file
for i in range (1, mr + 1):
for j in range (1, mc + 1):
# reading cell value from source excel file
c = ws1.cell(row = i, column = j)
cell = c
# writing the read value to destination excel file
ws2.cell(row = i, column = j).value = c.value
new_cell = ws2.cell(row = i, column = j)
new_cell.font = copy(cell.font)
new_cell.border = copy(cell.border)
new_cell.fill = copy(cell.fill)
new_cell.number_format = copy(cell.number_format)
new_cell.protection = copy(cell.protection)
new_cell.alignment = copy(cell.alignment)
# saving the destination excel file
wb2.save(str(dest_xl))
This seems to do the job - to set the column widths:
from openpyxl.utils import get_column_letter
for i in range(ws1.max_column):
ws2.column_dimensions[get_column_letter(i+1)].width = ws1.column_dimensions[get_column_letter(i+1)].width
As of now i can read EXCEL file's all sheet.
e.msgbox("select Excel File")
updated_deleted_xls = e.fileopenbox()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
openfile = e.fileopenbox()
for sheet in book.sheets():
for row in range(sheet.nrows):
for col in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
If you open your editor from the desktop or command line, you would have to specify the file path while trying to read the file:
import pandas as pd
df = pd.read_excel(r'File path', sheet_name='Sheet name')
Alternatively, if you open your editor in the file's directory, then you could read directly using the panda library
import pandas as pd
df = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='Title Sheet')
df1 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx',sheet_name='Transactions')
df2 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='NewCustomerList')
df3 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerDemographic')
df4 = pd.read_excel('KPMG_VI_New_raw_data_update_final.xlsx', sheet_name='CustomerAddress')
Maybe Pandaswould be helpful ( the go-to package for data) :
import pandas as pd
df = pd.read_excel('filname.xls', sheet = 0)
Edit: Since a lot of time has passed and pandas matured the arguemnts have change. So for pandas >1.0.0
import pandas as pd
df = pd.read_excel('filname.xls', sheet_name = 0)
You can use book.sheet_by_name() to read specific sheets by their name from xls file.
for name, sheet_name in zip(filename, sheetnumber):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
thecell = sheet.cell(row, 0)
xfx = sheet.cell_xf_index(row, 0)
xf = book.xf_list[xfx]
filename is the path to your xls file. Specify the sheet number you need to read in sheetnumber.
Alternatively, you could use book.sheet_by_index() and pass argument to return a specific sheet.
From docs:
sheet_by_index(sheetx)
Parameters: sheetx – Sheet index in range(nsheets)
For example:
first_sheet = book.sheet_by_index(0) # returns the first sheet.
You can use either book.sheet_by_name() or book.get_sheet()
Example using get_sheet()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet = book.get_sheet(0) #Gets the first sheet.
Example using sheet_by_name()
book = xlrd.open_workbook(updated_deleted_xls, formatting_info=True)
sheet_names = book.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
MoreInfo on getting sheet by sheet_by_name
I'm trying to write multiple excels' column A into a new excel's column A (assuming all the excels have one worksheet each.) I've written some code, which can write one excel's column A into the new excel's column A; but if there are multiple excels, the new excel's column A will be overwritten multiple times. So how could I just add all the column As to the new excel sheet one after another without overwriting each other?
Below are my code:
import os, openpyxl
path = os.getcwd()
def func(file):
for file in os.listdir(path):
if file.endswith('.xlsx'):
wb = openpyxl.load_workbook(file)
sheet = wb.active
colA = sheet['A']
wb = openpyxl.Workbook()
r = 1
for i in colA:
sheet = wb.active
sheet.cell(row=r, column=1).value = i.value
r += 1
wb.save('new.xlsx')
func(file)
Thank you so much!!
you could proceed for example as:
import os, openpyxl
path = os.getcwd()
def func(outputFile):
c = 0
#create output workbook
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
c += 1 #move to the next column in output
wb = openpyxl.load_workbook(fName)
sheet = wb.active #input sheet
#for r in range(1, sheet.max_row+1):
# sheetOut.cell(row=r, column=c).value = sheet.cell(row = r, column = 1).value
for r, cell in enumerate(sheet['A']):
sheetOut.cell(row = r+1, column = c).value = cell.value
wbOut.save(outputFile)
#"concatenate" all columns A into one single column
def funcAppend(outputFile):
wbOut = openpyxl.Workbook()
sheetOut = wbOut.active
r = 1
for fName in os.listdir(path):
if fName.endswith('.xlsx'):
wb = openpyxl.load_workbook(fName)
sheet = wb.active
for cell in sheet['A']:
sheetOut.cell(row = r, column = 1).value = cell.value
r += 1
wbOut.save(outputFile)
func('test.xlsx')
I want to read the data given in 2nd and 3rd column from XLSX file.
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
cols = (sheet.row_values(row,1)) and (sheet.row_values(row,2))
print(cols)
But is gives below error when i executed above script..
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) File
C:\Python27\.....\xlrd_init_.py", line 1323, in getbof raise
XLRDError('Expected BOF record; found 0x%04x' % opcode)
xlrd.biffh.XLRDError: Expected BOF record; found 0x4b50
Try this
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheets = workbook.sheet_names()
required_data = []
for sheet_name in sheets:
sh = workbook.sheet_by_name(sheet_name)
for rownum in range(sh.nrows):
row_valaues = sh.row_values(rownum)
required_data.append((row_valaues[0], row_valaues[1]))
print required_data
This example read all the content of the excel sheet and puts it in a matrix (list of lists), then you can use the columns you need:
import xlrd
workbook = xlrd.open_workbook("C:/Users/File.xlsx","rb")
sheet = workbook.sheet_by_index(0)
rows = []
for i in range(sheet.nrows):
columns = []
for j in range(sheet.ncols):
columns.append(sheet.cell(i, j).value)
rows.append(columns)
print rows
I have a list of excel files and their corresponding sheet number. I need python to go to those sheets and find out the cell location for a particular content. Thanks to "alecxe", I used the following code and it worked well.
import xlrd
value = 'Avg.'
fn = ('C:/ab1.xls', 'C:/ab2.xls','C:/ab3.xls','C:/ab4.xls','C:/ab5.xls',)
sn = ('505840', '505608', '430645', '505464', '505084')
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Later I wanted to make changes and instead of writing down the filename and sheetnumber, I wanted python to grab them from an excel sheet. But the program is not printing anything. Can anyone show me where I made the mistake? Highly appreciate your comment!
import xlrd
import glob
import os
value = 'Avg.'
sheetnumber = []
filename = []
xlfile = "C:\\Users\\tsengineer\\Desktop\\Joydip Trial\\Simple.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
filename.append(value)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
sheetnumber.append(value)
fn = tuple(filename)
sn = tuple(sheetnumber)
for name, sheet_name in zip(fn, sn):
book = xlrd.open_workbook(name)
sheet = book.sheet_by_name(sheet_name)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
print row, column
Definitely for some reasons, the loop is not working as I am getting two empty lists as output. Any thoughts?
import xlrd
value = 'Avg.'
sheetnumber = []
filename = []
rowlist = []
columnlist = []
xlfile = "C:/Users/Joyd/Desktop/Experiment/Simple_1.xls"
workbook = xlrd.open_workbook(xlfile)
sheet = workbook.sheet_by_index(0)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 17))
filename.append(value)
for row in range(sheet.nrows):
value = str(sheet.cell_value(row, 15))
sheetnumber.append(value)
fn = tuple(filename)
sn = tuple(sheetnumber)
for fname, sname in zip(fn, sn):
book = xlrd.open_workbook(fname)
sheet = book.sheet_by_name(sname)
for row in range(sheet.nrows):
for column in range(sheet.ncols):
if sheet.cell(row,column).value == value:
rowlist.append(row)
columnlist.append(column)
print rowlist
print columnlist