Modify an existing Excel file using Openpyxl in Python - python

I am basically trying to copy some specific columns from a CSV file and paste those
in an existing excel file[*.xlsx] using python. Say for example, you have a CSV file like this :
col_1 col_2 col_3 col_4
1 2 3 4
5 6 7 8
9 10 11 12
So, i wanted to copy the both col_3 and col_4 and paste those in col_8 and col_9 in an existing excel file [which is a .XLSX format].
I have tried this in various way to solve, but could not find out the exact way.
i tried something like this :
with open( read_x_csv, 'rb') as f:
reader = csv.reader(f)
for row in reader:
list1 = row[13]
queue1.append(list1)
list2 = row[14]
queue2.append(list2)
list3 = row[15]
queue3.append(list3)
list4 = row[16]
queue4.append(list4)
and then
rb = open_workbook("Exact file path.....")
wb = copy(rb)
ws = wb.get_sheet(0)
row_no = 0
for item in queue1:
if(item != ""):
ii = int(item)
ws.write(row_no,12,ii)
row_no = row_no + 1
#ws.write(item)
print item
else:
ws.write(row_no,12,item)
row_no = row_no + 1
wb.save("Output.xls")
but problem with this solution is it does not allow me to save as *.XLSX format which is
strictly required for me.
I have tried to use Openpyxl as it can handle *.XLSX format, but could not find out a way to modify the existing excel file. can anyone please help on this?
Doubt :
1) Can we really read a whole column from a CSV file and store into an array/list
using python?
2) Can we modify the existing excel file which is in .XLSX format using
openpyxl or any other package?

You can try the following implementation
from openpyxl import load_workbook
import csv
def update_xlsx(src, dest):
#Open an xlsx for reading
wb = load_workbook(filename = dest)
#Get the current Active Sheet
ws = wb.get_active_sheet()
#You can also select a particular sheet
#based on sheet name
#ws = wb.get_sheet_by_name("Sheet1")
#Open the csv file
with open(src) as fin:
#read the csv
reader = csv.reader(fin)
#enumerate the rows, so that you can
#get the row index for the xlsx
for index,row in enumerate(reader):
#Assuming space separated,
#Split the row to cells (column)
row = row[0].split()
#Access the particular cell and assign
#the value from the csv row
ws.cell(row=index,column=7).value = row[2]
ws.cell(row=index,column=8).value = row[3]
#save the csb file
wb.save(dest)
Can we really read a whole column from a CSV file and store into an array/list using python? No, because files are read sequentially, csv reader cannot read a column of data to a row. Instead you may read the whole content and use izip and islice to get a particular column. You can also use numpy.array
Can we modify the existing excel file which is in .XLSX format using openpyxl or any other package? Yes, see the example above

As it is 2021, get_sheet_by_name is deprecated and raises an DeprecationWarning with the following message:
Call to deprecated function get_sheet_by_name (Use wb[sheetname]).
The following snippet can be used in order to not raise the warning.
from openpyxl import load_workbook
file_path = 'test.xlsx'
wb = load_workbook(file_path)
ws = wb['SHEET_NAME'] # or wb.active
ws['G6'] = 123
wb.save(file_path)

from openpyxl import load_workbook
# Class to manage excel data with openpyxl.
class Copy_excel:
def __init__(self,src):
self.wb = load_workbook(src)
#self.ws = self.wb.get_sheet_by_name("Sheet1") # Deprecated
self.ws = self.wb["Sheet1"]
self.dest="destination.xlsx"
# Write the value in the cell defined by row_dest+column_dest
def write_workbook(self,row_dest,column_dest,value):
c = self.ws.cell(row = row_dest, column = column_dest)
c.value = value
# Save excel file
def save_excel(self) :
self.wb.save(self.dest)

Open an existing excel file (Using load_workbook(...))
As simple as that!
from openpyxl import load_workbook
wb = load_workbook('test.xlsx')
See docs: https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file
Append data at the end (keeping the old data)
work_sheet = wb.active # Get active sheet
work_sheet.append(['John', 'Customer', 'He likes football'])
Save modified workbook in test.xlsx
wb.save('test.xlsx')

Related

add specific rows and column into another excel file using openpyxl in python

I'm trying to merge multiple file into one excel file using openpyxl in python
I know there is a way using panda, but my files have a problem there have been always 2 empty rows in the beginning of the excel file
So to avoid that I'm using openpyxl with the old way
Just open all files and copy the specific rows and columns to a new one
The first step I find out how to do it by just copy the specific row's and column of the new xlsx file
but I didn't find a way to add the next file (only the value not the header) under the first one
this my code
So far it just copy the first file (the header and the value)
But I didn't find out how to add the next file (only the value) under the first one
import openpyxl as xl
from openpyxl import Workbook
import os
def find_xlsx_files():
# the current path
dir_path = os.path.dirname(os.path.abspath(__file__))
# list to store files
res = []
# Iterate directory
for file in os.listdir(dir_path):
# check only xlsx files
if file.endswith('.xlsx'):
res.append(file)
return (res)
wb1 = xl.load_workbook (find_xlsx_files()[0])
ws1 = wb1.worksheets [0]
# open target Excel file
wb2 = Workbook()
ws = wb2.active
ws.title = "Changed Sheet"
wb2.save(filename = 'sample_book.xlsx')
ws2 = wb2.active
# calculate the total rows and
# columns in the Excel source file
mr = ws1.max_row
mc = ws1.max_column
# copy cell values ​​from source
# Excel file to target Excel file
for i in range ( 3 , mr + 1 ):
for j in range ( 2 , mc + 1 ):
# read cell value from Excel source file
c = ws1.cell (row = i, column = j)
# writing the read value to the target Excel file
ws2.cell (row = i, column = j) .value = c.value
# save target Excel file
wb2.save ( str ('sample_book.xlsx'))
What you are doing is creating a list of the excel files in the default directory then just opening the first file '[0]' in the list with the line;
wb1 = xl.load_workbook (find_xlsx_files()[0])
This will never attempt to access any other excel file in the list. Having the list generation in the load book command isn't good, you don't want to be generating the list of available excel files each time you process a file. Calling of the function find_xlsx_files() should be done once.
The easiest fix to your code is to get your list of excel files and then iterate that list for processing.
excel_files = find_xlsx_files()
for xl_file in excel_files:
wb1 = xl.load_workbook(xl_file)
...
Also it should not be necessary to save the book until you have finished writing all data.
The function can be simplified using glob instead if you prefer.
import glob
import os
from openpyxl import Workbook, load_workbook
dir_path = os.path.dirname(os.path.abspath(__file__))
excel_files = glob.glob(dir_path + "/[!~]*.xlsx")
for xl_file in excel_files:
wb1 = load_workbook(xl_file)
ws1 = wb1.worksheets[0]
# open target Excel file
wb2 = Workbook()
ws = wb2.active
ws.title = "Changed Sheet"
# wb2.save(filename='sample_book.xlsx')
ws2 = wb2.active
# calculate the total rows and
# columns in the Excel source file
mr = ws1.max_row
mc = ws1.max_column
# copy cell values ​​from source
# Excel file to target Excel file
for i in range(3, mr+1):
for j in range(2, mc+1):
# read cell value from Excel source file
c = ws1.cell(row=i, column=j)
# writing the read value to the target Excel file
ws2.cell(row=i, column=j).value = c.value
# save target Excel file
wb2.save(str('sample_book.xlsx'))
This is also assuming there is only one sheet in each excel file you want to process since you're only opening the first sheet.
ws1 = wb1.worksheets[0]

How to import data from .txt file to a specifc excel sheet with Python?

I am trying to automate a process that basically reads in values from text files into certain excel cells. I have a template in excel that will read data from various sheets under certain names. For example, the template will read in data from "Video scores". Video scores is a .txt file that I copy and paste into excel. There are 5 different text files used in each project so it gets tedious after a while and when there are a lot of projects to complete.
How can I import or copy and paste these .txt files into excel to a specified sheet? I have been using openpyxl for the other parts of this project, but I am open to using another library if it can't be done with openpxl.
I've also tried opening and reading a file, but I couldn't figure out how to do what I want with that either. I have found a list of all the files I need, its just a matter of getting them into excel.
Thanks in advance for anyone who helps.
First, import the TXT file into a list in python, i'm asumming the TXT file is like this
1
2
3
4
....
with open(path_txt, "r") as e:
list1 = [i for i in e]
then, we paste the values of the list on the worksheet you need
from openpyxl import load_workbook
wb = load_workbook(path_xlsx)
ws = wb[sheet_name]
ws["A1"] = "values" #just a header
row = 2 #represent the 2 row of the sheet
column = 1 #represent the column "A" of the sheet
for i in list1:
ws.cell(row=row, column=column).value = i #getting the current cell, and writing the value of the list
row += 1 #just setting the current to the next
wb.save(path_xlsx)
Hope this works for you.
Pandas would do the trick!
Approach:
Have a sheet containing path to your files, separator, the corresponding target sheet names
Now read this excel sheet using pandas and iterate over each row for each file details, read the data, write it to new excel sheet of same workbook.
import pandas as pd
file_details_path = r"/Users/path for xl sheet/file details/File2XlDetails.xlsx"
target_sheet_path = r"/Users/path to target xl sheet/File samples/FiletoXl.xlsx"
# create a writer to save the file content in excel
writer = pd.ExcelWriter(target_sheet_path, engine='xlsxwriter')
file_details = pd.read_excel(file_details_path,
dtype = str,
index_col = False
)
def write_to_excel(file, trg_sheet_name):
# writes it to excel
file.to_excel(writer,
sheet_name = trg_sheet_name,
index = False,
)
# loop through each file record
for index, file_dtl in file_details.iterrows():
# you can print and check the row content for reference
print(file_dtl['File_path'])
print(file_dtl['Separator'])
print(file_dtl['Target_sheet_name'])
# reads file
file = pd.read_csv(file_dtl['File_path'],
sep = file_dtl['Separator'],
dtype = str,
index_col = False,
)
write_to_excel(file, file_dtl['Target_sheet_name'])
writer.save()
Hope this helps! Let me know if you run into any issues...

Take specific rows from a csv file and put in an excel file using python

Good day,
I want to take some specific column(i.e column 3 and column 7) from a csv file and these two columns need to put into an output excel file.
I think that my issue is to put data in excel file.
The code that I tried to use is below.
import csv
from openpyxl import *
from openpyxl.cell import get_column_letter
file1=open('WebShop_Export_2016_Feb_19.csv','r')
readfile=csv.reader(file1,delimiter=';')
data=[]
wb=Workbook()
dest_filename = 'Name of OUPUT excel.xlsx'
ws = wb.active
ws.title="Output XLSX"
for row in readfile:
data=row[2],row[6] # take column 3 and 7 from source file
print(data)# this line is just to see the info that will be put in the excel output
for row_index, item in enumerate(data):
for column_index, cell in enumerate(item):
column_letter = get_column_letter((column_index + 1))
ws.cell('%s%s'%(column_letter, (row_index + 1))).value = cell
file1.close()
wb.save('Name of OUPUT excel.xlsx')
Thank you in advance!

How to write/update data into cells of existing XLSX workbook using xlsxwriter in python

I am able to write into new xlsx workbook using
import xlsxwriter
def write_column(csvlist):
workbook = xlsxwriter.Workbook("filename.xlsx",{'strings_to_numbers': True})
worksheet = workbook.add_worksheet()
row = 0
col = 0
for i in csvlist:
worksheet.write(col,row, i)
col += 1
workbook.close()
but couldn't find the way to write in an existing workbook.
Please help me to write/update cells in existing workbook using xlswriter or any alternative.
Quote from xlsxwriter module documentation:
This module cannot be used to modify or write to an existing Excel
XLSX file.
If you want to modify existing xlsx workbook, consider using openpyxl module.
See also:
Modify an existing Excel file using Openpyxl in Python
Use openpyxl to edit a Excel2007 file (.xlsx) without changing its own styles?
you can use this code to open (test.xlsx) file and modify A1 cell and then save it with a new name
import openpyxl
xfile = openpyxl.load_workbook('test.xlsx')
sheet = xfile.get_sheet_by_name('Sheet1')
sheet['A1'] = 'hello world'
xfile.save('text2.xlsx')
Note that openpyxl does not have a large toolbox for manipulating and editing images. Xlsxwriter has methods for images, but on the other hand cannot import existing worksheets...
I have found that this works for rows...
I'm sure there's a way to do it for columns...
import openpyxl
oxl = openpyxl.load_workbook('File Loction Here')
xl = oxl.['SheetName']
x=0
col = "A"
row = x
while (row <= 100):
y = str(row)
cell = col + row
xl[cell] = x
row = row + 1
x = x + 1
You can do by xlwings as well
import xlwings as xw
for book in xlwings.books:
print(book)
If you have issue with writing into an existing xls file because it is already created you need to put checking part like below:
PATH='filename.xlsx'
if os.path.isfile(PATH):
print "File exists and will be overwrite NOW"
else:
print "The file is missing, new one is created"
...
and here part with the data you want to add

How can I open an Excel file in Python?

How do I open a file that is an Excel file for reading in Python?
I've opened text files, for example, sometextfile.txt with the reading command. How do I do that for an Excel file?
Edit:
In the newer version of pandas, you can pass the sheet name as a parameter.
file_name = # path to file + file name
sheet = # sheet name or sheet number or list of sheet numbers and names
import pandas as pd
df = pd.read_excel(io=file_name, sheet_name=sheet)
print(df.head(5)) # print first 5 rows of the dataframe
Check the docs for examples on how to pass sheet_name: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html
Old version:
you can use pandas package as well....
When you are working with an excel file with multiple sheets, you can use:
import pandas as pd
xl = pd.ExcelFile(path + filename)
xl.sheet_names
>>> [u'Sheet1', u'Sheet2', u'Sheet3']
df = xl.parse("Sheet1")
df.head()
df.head() will print first 5 rows of your Excel file
If you're working with an Excel file with a single sheet, you can simply use:
import pandas as pd
df = pd.read_excel(path + filename)
print df.head()
Try the xlrd library.
[Edit] - from what I can see from your comment, something like the snippet below might do the trick. I'm assuming here that you're just searching one column for the word 'john', but you could add more or make this into a more generic function.
from xlrd import open_workbook
book = open_workbook('simple.xls',on_demand=True)
for name in book.sheet_names():
if name.endswith('2'):
sheet = book.sheet_by_name(name)
# Attempt to find a matching row (search the first column for 'john')
rowIndex = -1
for cell in sheet.col(0): #
if 'john' in cell.value:
break
# If we found the row, print it
if row != -1:
cells = sheet.row(row)
for cell in cells:
print cell.value
book.unload_sheet(name)
This isn't as straightforward as opening a plain text file and will require some sort of external module since nothing is built-in to do this. Here are some options:
http://www.python-excel.org/
If possible, you may want to consider exporting the excel spreadsheet as a CSV file and then using the built-in python csv module to read it:
http://docs.python.org/library/csv.html
There's the openpxyl package:
>>> from openpyxl import load_workbook
>>> wb2 = load_workbook('test.xlsx')
>>> print wb2.get_sheet_names()
['Sheet2', 'New Title', 'Sheet1']
>>> worksheet1 = wb2['Sheet1'] # one way to load a worksheet
>>> worksheet2 = wb2.get_sheet_by_name('Sheet2') # another way to load a worksheet
>>> print(worksheet1['D18'].value)
3
>>> for row in worksheet1.iter_rows():
>>> print row[0].value()
You can use xlpython package that requires xlrd only.
Find it here https://pypi.python.org/pypi/xlpython
and its documentation here https://github.com/morfat/xlpython
This may help:
This creates a node that takes a 2D List (list of list items) and pushes them into the excel spreadsheet. make sure the IN[]s are present or will throw and exception.
this is a re-write of the Revit excel dynamo node for excel 2013 as the default prepackaged node kept breaking. I also have a similar read node. The excel syntax in Python is touchy.
thnx #CodingNinja - updated : )
###Export Excel - intended to replace malfunctioning excel node
import clr
clr.AddReferenceByName('Microsoft.Office.Interop.Excel, Version=15.0.0.0, Culture=neutral, PublicKeyToken=71e9bce111e9429c')
##AddReferenceGUID("{00020813-0000-0000-C000-000000000046}") ''Excel C:\Program Files\Microsoft Office\Office15\EXCEL.EXE
##Need to Verify interop for version 2015 is 15 and node attachemnt for it.
from Microsoft.Office.Interop import * ##Excel
################################Initialize FP and Sheet ID
##Same functionality as the excel node
strFileName = IN[0] ##Filename
sheetName = IN[1] ##Sheet
RowOffset= IN[2] ##RowOffset
ColOffset= IN[3] ##COL OFfset
Data=IN[4] ##Data
Overwrite=IN[5] ##Check for auto-overwtite
XLVisible = False #IN[6] ##XL Visible for operation or not?
RowOffset=0
if IN[2]>0:
RowOffset=IN[2] ##RowOffset
ColOffset=0
if IN[3]>0:
ColOffset=IN[3] ##COL OFfset
if IN[6]<>False:
XLVisible = True #IN[6] ##XL Visible for operation or not?
################################Initialize FP and Sheet ID
xlCellTypeLastCell = 11 #####define special sells value constant
################################
xls = Excel.ApplicationClass() ####Connect with application
xls.Visible = XLVisible ##VISIBLE YES/NO
xls.DisplayAlerts = False ### ALerts
import os.path
if os.path.isfile(strFileName):
wb = xls.Workbooks.Open(strFileName, False) ####Open the file
else:
wb = xls.Workbooks.add# ####Open the file
wb.SaveAs(strFileName)
wb.application.visible = XLVisible ####Show Excel
try:
ws = wb.Worksheets(sheetName) ####Get the sheet in the WB base
except:
ws = wb.sheets.add() ####If it doesn't exist- add it. use () for object method
ws.Name = sheetName
#################################
#lastRow for iterating rows
lastRow=ws.UsedRange.SpecialCells(xlCellTypeLastCell).Row
#lastCol for iterating columns
lastCol=ws.UsedRange.SpecialCells(xlCellTypeLastCell).Column
#######################################################################
out=[] ###MESSAGE GATHERING
c=0
r=0
val=""
if Overwrite == False : ####Look ahead for non-empty cells to throw error
for r, row in enumerate(Data): ####BASE 0## EACH ROW OF DATA ENUMERATED in the 2D array #range( RowOffset, lastRow + RowOffset):
for c, col in enumerate (row): ####BASE 0## Each colmn in each row is a cell with data ### in range(ColOffset, lastCol + ColOffset):
if col.Value2 >"" :
OUT= "ERROR- Cannot overwrite"
raise ValueError("ERROR- Cannot overwrite")
##out.append(Data[0]) ##append mesage for error
############################################################################
for r, row in enumerate(Data): ####BASE 0## EACH ROW OF DATA ENUMERATED in the 2D array #range( RowOffset, lastRow + RowOffset):
for c, col in enumerate (row): ####BASE 0## Each colmn in each row is a cell with data ### in range(ColOffset, lastCol + ColOffset):
ws.Cells[r+1+RowOffset,c+1+ColOffset].Value2 = col.__str__()
##run macro disbled for debugging excel macro
##xls.Application.Run("Align_data_and_Highlight_Issues")
import pandas as pd
import os
files = os.listdir('path/to/files/directory/')
desiredFile = files[i]
filePath = 'path/to/files/directory/%s'
Ofile = filePath % desiredFile
xls_import = pd.read_csv(Ofile)
Now you can use the power of pandas DataFrames!
This code worked for me with Python 3.5.2. It opens and saves and excel. I am currently working on how to save data into the file but this is the code:
import csv
excel = csv.writer(open("file1.csv", "wb"))

Categories