Multiple excel tables in Outlook with Python - python

I need to send Outlook email with 3 excel tables.
I have one excel file - master_file.csv (this file is filled with automated data from pandas data frame)
In this file I have one sheet (Sheet1) with 3 tables
These tables have always the same number of columns:
table_1 from A to R
table_2 from S to AJ
table_3 from AK to BD
Number of rows is changing every time so range for rows should be determined depending on filled cells (probably XlDirectionDown)
These tables have their own formatting in Excel file - this formatting needs to be copied into the email
Email should look somethig like that:
'Text'
"Table 1'
'Text"
"Table 2"
"Text"
"Table 3"
"Text"
I already tried code below but can't figure it out all this together and I bumped into 100 of options which none of them works.
Can you help me out with problem of adding excel tables to outlook email when table needs to be determined based on filled cells in rows?
import sys
from pathlib import Path
import win32com.client as win32
from PIL import ImageGrab
excel_path = str(Path.cwd() / 'master_file.xlsm')
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
excel.DisplayAlerts = False
wb = excel.Workbooks.Open(excel_path)
ws = wb.Worksheets(1)
win32c = win32.constants
ws.Range("A1:R11").CopyPicture(Appearance=1, Format=win32c.xlBitmap)
img = ImageGrab.grabclipboard()
image_path = str(Path.cwd() / 'test.png')
img.save(image_path)
outlook = win32.gencache.EnsureDispatch('Outlook.Application')
new_mail = outlook.CreateItem(0)
new_mail.To = 'person#email.com'
new_mail.Attachments.Add(Source=image_path)
body = "<h1>Email text...</h1><br><br> <img src=test.png>"
new_mail.HTMLBody = (body)
new_mail.Display()
wb.Close()```

I'm answering my own question as I found a solution and maybe it will be helpful for somebody.
There are actually 2 solutions but the second one is more suitable for nice email in my opinion.
First solution: We have one csv/excel file.
import sys
from pathlib import Path
import win32com.client as win32
from PIL import ImageGrab
import xlwings as xw
# open raw data file
filename_read = 'master_file.csv'
wb = xw.Book(filename_read)
sht = wb.sheets[0]
# find the numbers of columns and rows in the sheet
num_col = sht.range('A1').end('right').column
num_row = sht.range('A4').end('down').row
# collect data
content_list = sht.range((1,1),(num_row,num_col-1))
excel_path = str(Path.cwd() / 'master_file.xlsm')
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = False
excel.DisplayAlerts = False
wb = excel.Workbooks.Open(excel_path)
ws = wb.Worksheets(1)
win32c = win32.constants
ws.Range(f'A1:R{num_row}').CopyPicture(Appearance=1, Format=win32c.xlBitmap)
img = ImageGrab.grabclipboard()
image_path1 = str(Path.cwd() / 'test.png')
img.save(image_path1)
win32c = win32.constants
ws.Range(f'S1:AJ{num_row}').CopyPicture(Appearance=1, Format=win32c.xlBitmap)
img = ImageGrab.grabclipboard()
image_path2 = str(Path.cwd() / 'test2.png')
img.save(image_path2)
outlook = win32.gencache.EnsureDispatch('Outlook.Application')
new_mail = outlook.CreateItem(0)
new_mail.To = 'person#email.com'
new_mail.Attachments.Add(Source=image_path1)
new_mail.Attachments.Add(Source=image_path2)
body = "<h1>Hello team,</h1> <br><br> <h2> Here are data for yesterday.</h2> <br><br> <h2>Call Metrics:</h2> <br><br> <img src=test.png width=1700 height=600> <br><br> <h2>Back office metrics:</h2> <img src=test2.png width=1700 height=600> "
new_mail.HTMLBody = (body)
new_mail.Display()
wb.Close()
This code is searching for a table end and take a picture of it and put it to email. It is good solution when you have always sort of the same number of rows. But if that changes a lot (like one time 30 rows, and second time 100 rows) images will be too small or too big if you set up one fixed width and height like I did.
Second solution: Create one excel for one table (i.e. "table_1.xlsm", "table_2.xlsm"). Put there a simple VBA code
Sub auto_open()
Application.ScreenUpdating = False
Application.AlertBeforeOverwriting = False
Application.DisplayAlerts = False
Range("A4:R200").Clear
Workbooks.Open "C:\Users\xxxxxxx\master_source.csv"
Windows("master_source.csv").Activate
'This range below select data till rows are filled'
Range("A2:R2", Range("B2:R2").End(xlDown).End(xlToRight)).Select
Range("A2").Activate
Selection.Copy
Windows("table_1.xlsm").Activate
Sheet1.Select
Range("A4").Select
Sheet1.Paste
Windows("master_source.csv").Application.CutCopyMode = False
Windows("master_source.csv").Close
Range("A2:R2", Range("B2:R2").End(xlDown).End(xlToRight)).Borders.LineStyle = XlLineStyle.xlContinuous
Range("A2:R2", Range("B2:R2").End(xlDown).End(xlToRight)).HorizontalAlignment = xlCenter
Range("A2:R2", Range("B2:R2").End(xlDown).End(xlToRight)).VerticalAlignment = xlCenter
ActiveWorkbook.Save
Application.Quit
End Sub
Save this SHEET as Web Page - (!!) This is important - save JUST a sheet. If you save whole Workbook it will save with FRAMES, which Outlook doesn't support.
Save it and tick a square with AutoRepublish (after every save it will update our HTML file).
Then this Python code
import os, time, sys
from datetime import datetime, timedelta, date
from pathlib import Path
import win32com.client as win32
#seting up yesterday date and date format
d = date.today() - timedelta(days=1)
dt = d.strftime("%d/%m/%y")
#saving copy of the file for future usage
filepath = Path(f'C:/Users/xxxxxxxx/Agent report {d}.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
master_df.to_csv(filepath)
#updating 3 tables - it opens every table file and then VBA doing it's job automatically as it is "auto_open"
for x in range(1, 4):
os.system(f'start "excel" "C:\\xxxxxxxxxxx\\table_{x}.xlsm"')
time.sleep(10)
outlook = win32.gencache.EnsureDispatch('Outlook.Application')
mail = outlook.CreateItem(0)
mail.To = 'person#gmail.com'
mail.Subject = f'Agent report for {dt}'
table1 = open(r'C:\xxxxxxxxxxxxxxxxxxxxxx\table_1.htm').read()
table2 = open(r'C:\xxxxxxxxxxxxxxxxx\table_2.htm').read()
table3 = open(r'C:\xxxxxxxxxxxxxxxxxxxxx\table_3.htm').read()
mail.HTMLBody = f"""\
<html>
<head></head>
<body>
Hello team,<br><br>
Below are metrics for your agents for previous day<br><br>
<b>First Metrics:</b><br><br>
{table1}<br><br>
<b>Second metrics:</b><br><br>
{table2}<br><br>
<b>Third statistics:</b><br><br>
{table3}<br>
Reference:<br>
Kind regards,<br>
</body>
</html>
"""
mail.Send()

Related

Copy Filtered Rows in MS Excel with Python

A small introduction:
I need a script that takes an Excel and based on a json that I give to him it sends a Mail to a list of person with attached the same Excel filtered(Every person has his personal filter option) and printed as pdf AND a copy of the excel where only the filtered cells are accessible.
from numpy import datetime64
import xlwings as xw
import pandas as pd
import os.path as path
import win32com.client as win32
import glob
import numpy as np
import json
import codecs
xw.interactive = False
file = glob.glob("*.xlsm")[0]
with open(path.abspath("MailingList.json")) as f:
mailList = json.loads(f.read())
percorso = path.dirname(path.abspath(file))
commessa = pd.read_excel(file,dtype="string", sheet_name="Matrice", usecols=["PROJECT"])
pp = pd.read_excel(file, sheet_name="Foglio1",skiprows=4, usecols=["PROJECT",'CLOSING DATE'])
pPlan = xw.Book(file)
outlook = win32.Dispatch("outlook.application")
def checkIfEmpty(pj):
ck = pp[(pp["PROJECT"] == pj)&((pp['CLOSING DATE'].isnull())|(pp['CLOSING DATE']>= (datetime64('today')- np.timedelta64(8, 'D'))))]
if ck.empty:
return False
else:
return True
for i in range(len(commessa.index)):
body = codecs.open("base.htm",'r')
proj = commessa.at[i, "PROJECT"]
if not pd.isna(proj):
if proj in mailList:
pPlan.sheets[0].api.Range("A1:P15452").AutoFilter(Field:=3, Criteral:=proj)
if checkIfEmpty(proj):
pPlan.sheets[0].api.ExportAsFixedFormat(0,FileName:=percorso+"\\"+"".join(file.split(".")[:-1])+" - "+proj+".pdf",IgnorePrintAreas=False, OpenAfterPublish=False)
temp = xw.Book()
temp.sheets.add()
pPlan.sheets['Foglio1'].range('A1:AF7000').copy(temp.sheets['Foglio1'].range('A1:AF7000'))
temp.save(percorso+"\\"+"".join(file.split(".")[:-1])+" - "+proj+".xlsx")
temp.close()
mail = outlook.CreateItem(0)
mail.To = mailList[proj]
mail.CC = "hidden"
mail.Subject = "".join(file.split(".")[:-1])+" - "+proj
mail.HTMLBody = body.read()
perk = percorso+"\\"+"".join(file.split(".")[:-1])+" - "+proj+".pdf"
pesciolino = percorso+"\\"+"".join(file.split(".")[:-1])+" - "+proj+".xlsx"
mail.Attachments.Add(perk)
mail.Attachments.Add(pesciolino)
mail.Send()
The only part where I actually have problem is the Excel copy part. It actually copies the entire sheet leaving the option to unfilter it, the rest of the code works with 0 problems.
Any solution to my problem would be nice, thanks.
EDIT: Simplier than I tought.
Just added:
temp.sheets.add()
pPlan.sheets[0].used_range.api.Copy(temp.sheets[0].api.Range("A1"))
temp.save(percorso+"\\"+"".join(file.split(".")[:-1])+" - "+proj+".xlsx")
temp.close()
and removed the "old" copy part.
Writing used_range instead of all range gets exactly what I wanted.

openpyxl is overwriting any work done by win32 macro in Excel

I am currently writing a script that requires me to update data in an Excel sheet and run a macro. After running the macro, I am trying to save the file using win32, in order to be able to run through a similar process again. However, I have linked everything back to a weird issue with win32 and openpyxl. It seems that if I run the macro using win32, save the file and open it, everything looks exactly how it should. But if I then use openpyxl to add something to that file after running the macro, the entire results from the macro are then deleted. For example, the macro fills in cells A4:H23 with the correct data, but then after saving with win32 and reopening and editing with openpyxl, if I try to add any value to cell A3 afterwards and open up the file, the only thing that shows is the value in A3 and nothing that the macro should have done. I can open the sheet manually and add the number and resave it perfectly fine, but when I use openpyxl to do so, it clears all the macro results. I have included my code below for reference. Any help would be appreciated.
from openpyxl import load_workbook
import os
import win32com.client
import re
import time
Macro1 = r"C:\Users\...\Documents\Macro1Demo.xlsm"
Macro2 = r"C:\Users\...\Documents\Macro2Demo.xlsm"
MiddleManTest = r"C:\Users\...\Documents\MiddleManTest.xlsm"
MiddleMan2 = r"C:\Users\...\Documents\MiddleMan2.xlsm"
DistrictList = r"C:\Users\...\Documents\DistrictList.xlsx"
Macro1WB = load_workbook(filename = Macro1, keep_vba = True)
Macro2WB = load_workbook(filename = Macro2, keep_vba = True)
MM2WB = load_workbook(filename = MiddleMan2, keep_vba = True)
ListWB = load_workbook(filename = DistrictList, keep_vba = True)
ListSheet = ListWB['Sheet1']
M1Sheet = Macro1WB['Entry']
M2Sheet = Macro2WB['Raw']
for i in range(2,3):
Macro1WB['Entry']['A2'].value = ListWB['Sheet1']['A' + str(i)].value
Macro1WB['Entry']['B2'].value = ListWB['Sheet1']['B' + str(i)].value
OrgName = Macro1WB['Entry']['A2'].value
DistrictName = Macro1WB['Entry']['B2'].value
Macro1WB.save(Macro1)
SimpleOrgName = re.sub('[/\:*?<>|"]','',OrgName)
SimpleDistrictName = re.sub('[/\:*?<>|"]','',DistrictName)
NewFileName = SimpleOrgName + "(" + SimpleDistrictName + ")"
print(Macro1WB['Entry']['A2'].value,Macro1WB['Entry']['B2'].value)
print("Org/District Copied.")
if os.path.exists(r"C:\Users\...\Documents\Macro1Demo.xlsm"):
xl = win32com.client.Dispatch("Excel.application")
xl.visible = True
workbook = xl.Workbooks.Open(os.path.abspath(r"C:\Users\...\Documents\Macro1Demo.xlsm"))
xl.Application.Run("Macro1Demo.xlsm!Module1.AdvancedFilter")
print("Data filtered.")
workbook.SaveAs(MiddleMan2)
xl.Application.Quit()
MM2WB['Entry']['A3'] = i
MM2WB.save(MiddleMan2)

Password protected xls data transfer to master sheet

I am quite new to python and currently writing a code to speed up a VBA process which takes 5 to 6 hours to complete and want to speed it up. The code needs to open a password protected excel, extract certain sheet and cell data to a master sheet and if column A is that same number then override so no duplicates:
Process:
Step 1: Open password protected xls
step 2: check for the duplicated number in column A and if the same value exists then override, copy required cells from each sheet to master wb and data sheet as shown below
step 3: go back to step one until all xls are done.
This is part of the VBA to show the process to a degree:
wbThis.Worksheets("Data").Range("A" & Store_Row_no) = NewNumber
wbThis.Worksheets("Data").Range("B" & Store_Row_no) = DateNew
wbThis.Worksheets("Data").Range("C" & Store_Row_no) = wbNew.Worksheets("Sheet1").Range("F2").Value
wbThis.Worksheets("Data").Range("D" & Store_Row_no) = wbNew.Worksheets("Sheet2").Range("H152").Value
wbThis.Worksheets("Data").Range("E" & Store_Row_no) = wbNew.Worksheets("Sheet3").Range("D3").Value
and this is my current code but cant work out how I open a password protected excel and copy to master sheet and then overide for data column A if it is a duplicate.
Python code so far:
import win32com.client
import sys
import os
foldername = ('C:\\Users\\')
password = 'ORANGE
pmaster = (r'C:\Users')
xlApp = win32com.client.Dispatch("Excel.Application")
xlApp.Visible = False
master = xlApp.Workbooks.Open(Filename=pmaster)
wb = xlApp.Workbooks.Open(foldername, False, True, None, password)
sh1 = wb.Sheets('sheet1') #sheet name1
sh2 = wb.Sheets('sheet2') #sheet name2
sh3 = wb.Sheets('sheet3') #sheet name2
out1 = sh1.Range("B2").value
out2 = sh1.Range("D2").value
out3 = sh1.Range("F2").value
out4 = sh2.Range("H152").value
out5 = sh3.Range("D3").value
print(out1,out2,out3,out4,out5)
Just need to loop through help and copy to new master wb
Thank you so much in advance

Python/Pandas: Iterate over Excel files and extract information

I founds threads on extracting info from various sheets of the same file and solutions to problems similar, but not exactly like mine.
I have a several Excel workbooks each containing several sheets. I would like to iterate over each workbook and extract information from a sheet name "3. Prices". This sheet is available in all files. The pieces of information to be extracted from this sheet in every file are two. The first is always found in cell range E13:H13 and the second from cells F19, I19 and K19.
I would like place the two pieces of extracted information next to one another (for a given file) and then stack the extract from every file on top in one master file. Also, the first column of the combined file should be the file name.
So something like this:
What I've tried so far, with no luck
from openpyxl import load_workbook
import os
import pandas as pd
directory = os.listdir('C:\\User\\files')
for file in directory:
if os.path.isfile(file):
file_name = file[0:3]
workbook = load_workbook(filename = file)
sheet = workbook['3. Prices']
e13 = sheet['E13'].value
f13 = sheet['F13'].value
g13 = sheet['G13'].value
h13 = sheet['H13'].value
f19 = sheet['F19'].value
i19 = sheet['I19'].value
k19 = sheet['K19'].value
df = df.append(pd.DataFrame({
"File_name":file_name,
"E13":e13, "F13":f13, "G13":g13,"H13":h13,
"F19":f19,"I19":i19,"K19":i19,
}, index=[0]))
I figured it out. I was missing two elements: 1) changing the current working directory to match the one in the variable 'directory' and 2) define a dataframe at the start
from openpyxl import load_workbook
import os
import pandas as pd
os.chdir('C:\\User\\files')
directory = os.listdir('C:\\User\\files')
df=pd.DataFrame()
for file in directory:
if os.path.isfile(file):
file_name = file[0:3]
workbook = load_workbook(filename = file, data_only=True)
sheet = workbook['3. Prices']
e13 = sheet['E13'].value
f13 = sheet['F13'].value
g13 = sheet['G13'].value
h13 = sheet['H13'].value
f19 = sheet['F19'].value
i19 = sheet['I19'].value
k19 = sheet['K19'].value
df = df.append(pd.DataFrame({
"File_name":file_name,
"E13":e13, "F13":f13, "G13":g13,"H13":h13,
"F19":f19,"I19":i19,"K19":i19,
}, index=[0]))

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories