I am using python to create a single file from each sheet in an excel ('xlsx') file. The first part works when i try to iterate through the files after they have been created in order to delete the first 8 rows i am having trouble using openpyxl. After creating the files how do i iterate through them and delete the first 8 rows?
import os
import xlrd
from xlutils.copy import copy
import xlwt
import openpyxl
import pandas as pd
path = 'C:\excelfiles'
targetdir = (path + "/New_Files/") #where you want your new files
if not os.path.exists(targetdir): #makes your new directory
os.makedirs(targetdir)
for root,dir,files in os.walk(path, topdown=False): #all the files you want to split
xlsfiles=[f for f in files] #can add selection condition here
for f in xlsfiles:
wb = xlrd.open_workbook(os.path.join(root, f), on_demand=True)
for sheet in wb.sheets(): #cycles through each sheet in each workbook
newwb = copy(wb) #makes a temp copy of that book
newwb._Workbook__worksheets = [ worksheet for worksheet in newwb._Workbook__worksheets if worksheet.name == sheet.name ]
#brute force, but strips away all other sheets apart from the sheet being looked at
namer = targetdir + f.strip(".xls") + sheet.name + ".xlsx"
newwb.save(namer.replace(',',''))
#saves each sheet as the original file name plus the sheet name
path2='C:/excelfiles/New_Files/'
for root, dir, files in os.walk(path2, topdown=False):
xlsfiles2=[f2 for f2 in files]
for f2 in xlsfiles2:
sheet = openpyxl.open(path2 + f2)
sheet.delete_rows(7)
book.save(f2.strip(".xlsx") + sheet.name + ".xlsx")
Found the answer. First i needed to convert the files to .xlxs and then i could open using openpyxl.
path = 'C:\excelfiles'
targetdir = (path + "/New_Files/") #where you want your new files
if not os.path.exists(targetdir): #makes your new directory
os.makedirs(targetdir)
for root,dir,files in os.walk(path, topdown=False): #all the files you want to split
xlsfiles=[f for f in files] #can add selection condition here
for f in xlsfiles:
wb = xlrd.open_workbook(os.path.join(root, f), on_demand=True)
for sheet in wb.sheets(): #cycles through each sheet in each workbook
newwb = copy(wb) #makes a temp copy of that book
newwb._Workbook__worksheets = [ worksheet for worksheet in newwb._Workbook__worksheets if worksheet.name == sheet.name ]
#brute force, but strips away all other sheets apart from the sheet being looked at
namer = targetdir + f.strip(".xls") + sheet.name + ".xls"
newwb.save(namer.replace(',',''))
#saves each sheet as the original file name plus the sheet name
path2 = 'C:/excelfiles/New_Files/'
for root,dir,files in os.walk(path2, topdown=False):
xlsfiles2=[t for t in files]
for p3 in xlsfiles2:
wholename = getnamestringusingcityanddate(new_stringer,datefromname)
pathandfilename = path2 + p3
pathandfilenamexls = pathandfilename.replace('.xls','.xlsx')
p.save_book_as(file_name= pathandfilename,dest_file_name=pathandfilenamexls)
os.remove(pathandfilename)
for root,dir,files in os.walk(path2, topdown=False):
xlsfiles3=[d for d in files]
for p4 in xlsfiles3:
filepathcomplete= path2 + p4
book = openpyxl.load_workbook(filepathcomplete)
sheenames = book.sheetnames[0]
sheet = book[sheenames]
sheet.delete_rows(1,8)
sheet.delete_cols(11)
sheet.delete_cols(5)
date_style = NamedStyle(name='datetime', number_format='MM/DD/YYYY')
for col in range(1,2):
for row in range(2, sheet.max_row + 1):
sheet.cell(row=row,column=col).style = date_style
for col in range(10,11):
for row in range(2, sheet.max_row + 1):
sheet.cell(row=row,column=col).number_format = '0.00'
book.save(filepathcomplete)
book.close()
Related
Example would be I have a file PDF and it's just John Smith. In the Excel file I have John Smith and in another column John Smith's Tax ID number. How do I get John Smith 5555555? Would like to rename all file names from matching the name in an Excel file.def rename_file(file_to_rename, source_file):
def rename_file(file_to_rename, source_file):
p = Path('C:\Users\Chris\Box\Capital\Fund, 2 L.P\Tax')
filename = p.stem
wb = xlrd.open_workbook('C:\Users\Chris\Box\Capital\Fund, 2 L.P\Tax\Fund 2 LP - Document Uploader ID 2021')
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 4 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col]}_{row_value[col - 1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = "Fund 2 LP - Document Uploader ID 2021.xlsx" # excel file to get new filename
source_directory = "C:\Users\Chris\Box\Capital\Fund, 2 L.P\Tax\2021 Final" # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
Something like this will work. (You need to pip install the packages)
import xlrd
from pathlib import Path
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(source_file)
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 0 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col]}_{row_value[col + 1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
break
if __name__ == "__main__":
# add iteration to rename multiple files
file_to_rename = "john.pdf"
source_file = "new_names.xlsx"
rename_file(file_to_rename, source_file)
You can add iteration if you want to rename multiple files.
Hope this helps!
--
Edit
Since you asked for more details, I'll add a simple iteration example.
import xlrd
from pathlib import Path
def rename_file(file_to_rename, source_file):
p = Path(file_to_rename)
filename = p.stem
wb = xlrd.open_workbook(source_file)
sheet = wb.sheet_by_index(0)
for row_num in range(sheet.nrows):
row_value = sheet.row_values(row_num)
col = 0 # 'john smith' col number
if row_value[col] == filename:
new_filename = f'{row_value[col]}_{row_value[col + 1]}' # format as you want
p.rename(Path(p.parent, new_filename + p.suffix)) # rename
break
def get_paths_in_directory(directory):
return Path(directory).glob('*.pdf')
if __name__ == "__main__":
source_file = "new_names.xlsx" # excel file to get new filename
source_directory = "files_to_rename" # directory where your files to rename are.
# iterate all pdf files in the given directory
paths = get_paths_in_directory(source_directory)
for file_to_rename in paths:
rename_file(str(file_to_rename), source_file)
And, of course, create your script file (e.g. 'rename_files.py'), copy and paste this code, and run it by python3 rename_files.py
let me know if you got more questions.
This is the code i have be working on but not sure how to merge them into one new excel file and get header and file name.
import os
import xlrd
Folder_path = input ("Enter the file path :")
def listDir(dir):
fileNames = os.listdir(dir)
loc = input ("Enter the path of file + filename :")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
sheet.cell_value(0, 0)
for i in range(sheet.ncols):
print(sheet.cell_value(0, i))
for filename in fileNames:
print(filename + sheet.cell_value(0, i))
if __name__ == '__main__':
listDir(Folder_path)
from this code i can get the head value and file name but i want the put as show in the picture , where it should print in a new excel file the file name and output header of that particular file.
output should be as follow's
https://i.stack.imgur.com/7bXoE.png
after i get file names i want to get header of each file and put in new excel file where it should show name file and there header .
This is one of doing it:
import os
from pathlib import Path
import xlrd
import pandas as pd
def listDir(inputdir):
allheaders=[]
fileNames = os.listdir(inputdir)
for filename in fileNames:
headers=[filename]
loc = os.path.join(Folder_path, filename)
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
for i in range(sheet.ncols):
headers.append(sheet.cell_value(0, i))
allheaders.append(headers)
return allheaders
Enter_path =input("Enter the file path :")
Folder_path = Path(Enter_path)
allheaders=listDir(Folder_path)
df=pd.DataFrame(allheaders)
df.to_excel("ListOfHeaders.xlsx",header=False, index=False)
I have a Python program that creates a new excel file based on some worksheets from a few other files. The following code I have copies the worksheets perfectly, but is unable to copy the image that is present in the worksheet. How do I copy images in an Excel worksheet to another Excel workbook using Python?
path1 = "/mnt/e/RecEasy-MVP-Python/FlaskApp/Uploaded_files/" + key
print path1
path2 = "/mnt/e/RecEasy-MVP-Python/FlaskApp/Compiled/" + current_acc_group + "_" + current_gl_account + ".xlsx"
print path2
path_to_key_sheet = "/mnt/e/RecEasy-MVP-Python/FlaskApp/Uploaded_files/" + key + "_key_sheet.txt"
print "Path to key sheet file:"
print path_to_key_sheet
wb1 = xl.load_workbook(filename=path1, read_only=True, data_only=True)
ws1 = wb1.worksheets[2]
counter = 0
for sheet in wb1:
if (str(sheet.title) == str(content_of_key_sheet_file)):
ws1 = wb1.worksheets[counter]
print "Sheet selected"
print sheet.title
counter = counter + 1
ws2 = wb2.create_sheet(ws1.title)
print "Copying from the Excel file: " + path1
for row in ws1:
for cell in row:
if (cell.value != None):
ws2[cell.coordinate].value = cell.value
wb2.save(path2)
install Pillow (just pip install Pillow, not needed import in your file)
then:
from openpyxl import drawing
.
.
.
img = drawing.image.Image('yourImg.png')
yourSheet.add_image(img, 'A2')
where A2 is your cell
I'd been struggling with this for a bit as most of the libraries I typically use to manipulate xlsx files seemed to not want to support this.
Fortunately, .xlsx is ooxml format. Thus, all you need to do is unzip the .xlsx and locate the pictures in xl/media/ of the directory you extracted your workbook to.
zip = ZipFile('yourWorkbook.xlsx')
zip.extractall()
Now you can insert them back into your new spreadsheet spreadsheet
import openpyxl
wb = openpyxl.Workbook()
ws = wb.worksheets[0]
img = openpyxl.drawing.image.Image('test.jpg')
img.anchor(ws.cell('A1'))
ws.add_image(img)
wb.save('out.xlsx')
I have an excel worksheet, some buttons and some macros. I use xlwings to make it work. Is there a way to save the workbook through xlwings ? I want to extract a specific sheet after doing an operation, but the saved sheet is the extracted sheet before the operation without the generated data.
My code for extracting the sheet I need is the following:
Set objFSO = CreateObject("Scripting.FileSystemObject")
src_file = objFSO.GetAbsolutePathName(Wscript.Arguments.Item(0))
sheet_name = Wscript.Arguments.Item(1)
dir_name = Wscript.Arguments.Item(2)
file_name = Wscript.Arguments.Item(3)
Dim objExcel
Set objExcel = CreateObject("Excel.Application")
objExcel.Visible = False
Dim objWorkbook
Set objWorkbook = objExcel.Workbooks(src_file)
objWorkbook.Sheets(sheet_name).Copy
objExcel.DisplayAlerts = False
objExcel.ActiveWorkbook.SaveAs dir_name + file_name + ".xlsx", 51
objExcel.ActiveWorkbook.SaveAs dir_name + file_name + ".csv", 6
objWorkbook.Close False
objExcel.Quit
Book.save() has now been implemented: see the docs.
Attempting to compile multiple .xlsx workbooks from a file into a single .csv
Loop I've created is only capturing the first workbook from the file.
How can I alter this to capture all workbooks in a file? Data is only on Sheet1 from all workbooks
import os
import xlrd
import csv
rootdir = r'C:\Users\username\Desktop\Mults'
filenames = []
for subdir, dir, files in os.walk(rootdir):
for file in files:
filenames.append(os.path.join(subdir, file))
wb = xlrd.open_workbook(os.path.join(subdir, file))
sh = wb.sheet_by_index(0)
with open('acit_multsTEST.csv','wb') as f:
c = csv.writer(f)
for r in range(sh.nrows):
c.writerow(sh.row_values(r))
I appreciate any help!
Thank you!
So you have to do the following.
Get a list of all the workbooks
Open a main csv to append all your data to
Iterate through your list of workbooks
Append each sheet to your csv
import glob
import os
import xlrd
import csv
ROOTDIR = r'C:\Users\username\Desktop\Mults'
wb_pattern = os.path.join(ROOTDIR, '*.xlsx')
workbooks = glob.glob(wb_pattern)
with open('out.csv', 'wb') as outcsv:
writer = csv.writer(outcsv)
for wb in workbooks:
book_path = os.path.join(ROOTDIR, wb)
book = xlrd.open(book_path)
sheet = book.sheet_by_index(0)
for row_num in xrange(sheet.nrows):
row = sheet(row_num)
writer.writerow(row)