Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas - python

I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()

Related

Coloring Columns with Pandas

I am trying to color columns in group_1, but I am getting this issue, any solution?
group_1 = [award, 'mean', 'max']
def change_color(workbook_param, color_hex_code):
"""Returns color format for excelsheet."""
header_format = workbook_param.add_format({
'bold': True,
'text_wrap': True,
'valign': 'top',
'fg_color': color_hex_code,
'border': 1})
return header_format
group_1_data = describe_df[columns= group_1].copy()
group_1_ind = [describe_df.columns.to_list().index(patient) for patient in group_1]
group_1_ind # [0, 3, 4]
group_1_data = describe_df[columns= group_1].copy()
^
SyntaxError: invalid syntax
the group_1_data gets inputted into here below
# Combining dataFrames in excel
excelName = input("Label your excel file: ")
xlsxAUTO = '.xlsx'
excelAutoNamed = excelName + xlsxAUTO
# Create a Pandas Excel writer.
writer = pd.ExcelWriter(excelAutoNamed,engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
describe_df.to_excel(writer,sheet_name='Validation',startrow=0 , startcol=0)
df.to_excel(writer,sheet_name='Validation',startrow=len(df.columns), startcol=0)
# Get the xlsxwriter workbook and worksheet objects. You can change sheet name as well.
workbook = writer.book
worksheet = writer.sheets['Validation']
# Do the modifications for desired columns.
for col_num, value in zip(group_1_ind, group_1_data.columns.values):
worksheet.write(0, col_num + 1, value, change_color(workbook, '#e3fc03'))
writer.save()
I have also tried group_1_data = describe_df[group_1].copy()

Why if_sheet_exists='replace' creating new excel sheet. while df exporting to a existing excel sheet

I'm trying to export df value to a excel file without affecting other sheet. but above code is creating a new sheet instead replacing in existing sheet. Can anyone please help on this?
here is my code
import pandas as pd
import openpyxl
df1 = pd.read_excel(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily_level_data.xlsx',sheet_name = 'test')
with pd.ExcelWriter(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test.xlsx', mode="a",
engine="openpyxl", on_sheet_exists ="replace") as writer:
df1.to_excel(writer, sheet_name="Day_level")
print('data imported in daily sales master - sucessfully')
Well looking at the documentation, it should be if_sheet_exists not on_sheet_exists. However, it still doesn't work correctly.
You could just pass the sheets to the writer with writer.sheets. Sort of annoying, but it works:
import pandas as pd
import openpyxl
book = openpyxl.load_workbook(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test1.xlsx')
df1 = pd.read_excel(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily_level_data.xlsx',sheet_name = 'test')
with pd.ExcelWriter(r'D:\FY 22 - 23\Apr - 22\Daily Sales\Daily Sales Master_test1.xlsx', mode="a",
engine="openpyxl", ) as writer:
writer.book = book
writer.sheets = {ws.title:ws for ws in book.worksheets}
df1.to_excel(writer, sheet_name="Day_level")
print('data imported in daily sales master - sucessfully')

Is it is possible to reference existing data to create a chart using openpyxl?

I have some data that is already in an xlsx sheet. Is it possible to build a chart using openpyxl with this data that already exists in these cells? The data gets updated monthly.
data
category 3/1/2021 3/8/2021
computer 2646 3000
network 117 200
other 316 20
total 3079 3220
Desired:
A chart month by month chart next to the data
Doing:
from datetime import date
from openpyxl import Workbook
from openpyxl.chart import (
LineChart,
Reference,
)
from openpyxl.chart.axis import DateAxis
wb = Workbook()
ws = wb.active
c2 = LineChart()
c2.title = "Date Axis"
c2.style = 12
c2.y_axis.title = "Size"
c2.y_axis.crossAx = 500
c2.x_axis = DateAxis(crossAx=100)
c2.x_axis.number_format = 'd-mmm'
c2.x_axis.majorTimeUnit = "days"
c2.x_axis.title = "Date"
c2.add_data(data, titles_from_data=True)
dates = Reference(ws, min_col=1, min_row=2, max_row=5)
c2.set_categories(dates)
ws.add_chart(c2, "E1")
Any suggestion is appreciated
#pull in your data
wb_obj = load_workbook(path)
sheet_obj = wb_obj.active
c1 = BarChart()
c1.title = "Test"
c1.y_axis.title = "Test1"
c1.x_axis.title = "Test2"
#reference your data
data = Reference(sheet_obj, min_col=2, min_row=2, max_col=3, max_row=5)
c1.add_data(data, titles_from_data=True)
#save the data
sheet_obj.add_chart(c1, "E1")
wb_obj.save("samples.xlsx")

Python openpyxl conditional format containsText

I all.
I have this code.
I need to add conditional format at the cells, the code working with numbers, but not with texts.
With texts or haven't effect or generate errors on file open
import pandas as pd
from openpyxl import formatting, styles
from openpyxl.formatting.rule import ColorScaleRule, CellIsRule, FormulaRule, Rule
from openpyxl.styles.differential import DifferentialStyle
df = pd.DataFrame({"Name": ['A', 'B', 'C', 'D', 'E'],
"Status": ['SUCCESS', 'FAIL', 'SUCCESS', 'FAIL', 'FAIL'],
"Value": [10, 15, 20, 25, 30]})
writer = pd.ExcelWriter('C:\\Users\\emarmis\\OneDrive - Ericsson AB\\WORK\\DEV\\PYTHON\\Ferie\\test.xlsx', engine='openpyxl')
df.to_excel(writer, 'TEST', startrow=1, startcol=1)
wb = writer.book
ws = writer.sheets['TEST']
red_color = 'ff0000'
yellow_color = 'ffff00'
red_fill = styles.PatternFill(start_color=red_color, end_color=red_color, fill_type='solid')
yellow_fill = styles.PatternFill(start_color=yellow_color, end_color=yellow_color, fill_type='solid')
# This not working
rule = Rule(type='containsText', operator='containsText', text='"FAIL"')
#rule = Rule(type='containsText', operator='containsText', text='FAIL') # same problem with or without double quote on text
rule.dfx = DifferentialStyle(fill=red_fill)
ws.conditional_formatting.add('D3:D7', rule)
# This generate error at the file open
#ws.conditional_formatting.add('D3:D7', formatting.rule.CellIsRule(operator='containsText', formula=['FAIL'], fill=red_fill))
ws.conditional_formatting.add('E3:E7', formatting.rule.CellIsRule(operator='lessThan', formula=['20'], fill=red_fill))
ws.conditional_formatting.add('E3:E7', formatting.rule.CellIsRule(operator='greaterThan', formula=['25'], fill=yellow_fill))
wb.close()
writer.save()
This is the result of the code above
Any suggestions?
Regards,
Marco
Dont know if this helps but it worked for me. Thou i cant get additional conditions to work.
String 'c-tur' gets red text with a another red fill:
red_text = Font(color="9C0006")
red_fill = PatternFill(bgColor="FFC7CE")
dxf_c = DifferentialStyle(font=red_text, fill=red_fill)
rule_c = Rule(type="containsText", operator="containsText", text="c-tur", dxf=dxf_c)
rule_c.formula = ['NOT(ISERROR(SEARCH("c-tur",B2)))']
ws.conditional_formatting.add('B2:'+column_name+str(antal_kolumn), rule_c)

how to make a list from result of a function?

When I run my code it works very well and makes an excel file I named exel.xlsx, but there is no information in exel.xlsx.
I think I make a mistake in making a list but I can't find a solution.
def randStr(length=7):
characters = list('bcdghijkmnpqrtuvwxyz23456789')
shuffle(characters)
exel = ''.join(characters[:length])
listb = [exel]
listb.append(exel)
workbook = xlsxwriter.Workbook('Exel.xlsx')
worksheet = workbook.add_worksheet('randomise')
chart = workbook.add_chart({'type': 'line'})
expenses = (listb)
row = 0
col = 0
workbook.close()
return exel
import xlsxwriter
workbook = xlsxwriter.Workbook('chart_line.xlsx')
worksheet = workbook.add_worksheet()
# Add the worksheet data to be plotted.
data = [10, 40, 50, 20, 10, 50]
worksheet.write_column('A1', data)
# Create a new chart object.
chart = workbook.add_chart({'type': 'line'})
# Add a series to the chart.
chart.add_series({'values': '=Sheet1!$A$1:$A$6'})
# Insert the chart into the worksheet.
worksheet.insert_chart('C1', chart)
workbook.close()
This is the basic way to add_chart , then adding the values to chart and inserting the chart .
But you are not adding any value to your chart. Moreover you are not adding any value to the column/row in sheet.
I ran your code , sheet with name "randomise" is created , but no data in it, as you havent added anything
solved!
def randStr(length=7):
characters = list('bcdghijkmnpqrtuvwxyz23456789')
shuffle(characters)
listc=''.join(characters[:length])
return listc
listb=[]
if __name__ == '__main__':
for i in range(20):
value1=(randStr())
listb.append(value1)
workbook = xlsxwriter.Workbook('Exel.xlsx')
worksheet = workbook.add_worksheet('randomise')
worksheet.write_column(0,0,listb)
chart = workbook.add_chart({'type': 'line'})
expenses =(listb)
row = 0
col = 0
workbook.close()
img = makeImage(value1, width=512)
with open('%d.png' % i, 'wb') as f:
f.write(img)
print (i)

Categories