Style the title - python

def multiple_dfs(file_name, sheet, *args):
"""
Put multiple dataframes into one xlsx sheet
"""
row=2
writer = pd.ExcelWriter(file_name, engine='openpyxl')
df = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df2 = pd.DataFrame(['Title'])
df.to_excel(writer, sheet, startrow=row, index=False)
df2.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
I would like to resize and bold Title from this code with pandas (or with openpyxl if it is not possible with pandas). Is there a way to do it accurately and "pythonically"?
I know we could use http://openpyxl.readthedocs.io/en/default/styles.html, but I don't know how to use it with my code.
UPDATE
According to #AndyHayden, normally the following code should bold the title, but that seems to not working.
import pandas as pd
import io, os, openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import (PatternFill, Border, Side, Alignment, Protection, Font)
def multiple_dfs(sheet):
row=2
writer = pd.ExcelWriter("testing.xlsx", engine='openpyxl')
df = pd.DataFrame(['Title'])
df2 = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
df2.to_excel(writer, sheet, startrow=row, index=False)
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
title = ws["A1"]; title.font = Font(bold=True)
wb.save("testing.xlsx")
writer.save()
writer.close()
multiple_dfs('aaa')
The title is never showed up in bold. How could I fix that?
Please let me know if the question is unclear.

The openpyxl docs offer a neat way to do this:
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=True, header=True):
ws.append(r)
for cell in ws['A'] + ws[1]:
cell.style = 'Pandas'
wb.save("pandas_openpyxl.xlsx")
Note: That if you are doing some other inserting beforehand (like you suggest in your previous question, you'll have to use a different row number.

Related

workbook has no attribute 'add_chart'

i want to plot data, that i gathered in Python, in Excel.
And if the file already exists, it should just add another Sheet.
But when i try to add a file it gives me the Error "workbook" object has no attribute "add_chart"
Here is the code that i got:
import pandas as pd
import openpyxl
import xlsxwriter
from pathlib import Path
def Convert(self):
path_to_file = '{0}.xlsx'.format(self.Entry_ExcelName.get())
path = Path(path_to_file)
excel_file = '{0}.xlsx'.format(self.Entry_ExcelName.get())
if path.is_file():
writer = pd.ExcelWriter(excel_file, engine='openpyxl', mode='a')
else:
writer = pd.ExcelWriter(excel_file, engine='xlsxwriter')
list_data_conv = self.list_data
df=pd.DataFrame(list_data_conv)
sheet_name = '{0}'.format(self.Entry_SheetName.get())
df.to_excel(writer, sheet_name=sheet_name)
workbook=writer.book
worksheet=writer.sheets[sheet_name]
chart=workbook.add_chart({'type': 'line'})
i=len(list_data_conv)
chart.add_series({
'categories': ['{}'.format(sheet_name) ,1,1,i,1],
'values': ['{}'.format(sheet_name) ,1,2,i,2]
})
chart.set_x_axis({'name': 'Index', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Value', 'major_gridlines': {'visible': False}})
chart.set_legend({'position': 'none'})
worksheet.insert_chart('D2', chart)
writer.save()
I know there is already a similiar question like mine, however that has not fixed my problem.
I feel like i can't see the forest for the trees.

Updating excel sheet without overwriting using openpyxl :Pandas

I am trying to update sheet without overwriting the complete data but my code is creating a new sheet instead.
import csv
import openpyxl
import pandas as pd
from openpyxl import load_workbook
df1 = pd.read_csv(r'C:\Users\name\Desktop\Data_Sj.csv')
ddf = df1[
(df1['Sports'] == 'Football')
]
print(ddf)
writer = pd.ExcelWriter(r'C:\Users\name\Desktop\check\Checklist1.xlsx', engine= 'openpyxl')
book = load_workbook(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.book = book
ddf.to_excel(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.save()

How to add sheet to existing excel file with pandas?

import pandas as pd
from openpyxl import load_workbook
book = load_workbook('test.xlsx')
writer = pd.ExcelWriter('test.xlsx')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
df.to_excel(writer, sheet_name='tab_name', index = False)
writer.save()
I get an error
AttributeError: 'Workbook' object has no attribute 'add_worksheet'
import pandas as pd
from openpyxl import load_workbook
fn = 'test.xlsx'
df = pd.read_excel(fn, header=None)
df2 = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
writer = pd.ExcelWriter(fn, engine='openpyxl')
book = load_workbook(fn)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name='tab_name', header=None, index=False)
df2.to_excel(writer, sheet_name='tab_name', header=None, index=False,
startcol=7,startrow=6)
writer.save()

xlsxwriter intersect row and column formats

I'm saving pandas DataFrame to Excel with xlsxwriter. First I'm adding some format to columns (change font, for instance). Then I want to change background color for some rows. But when I add set_row function, all my column's format is gone. Is there a way to combine it?
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
font_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 10})
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format({'bg_color': 'green'})
for index in range(5):
if index % 2 == 0:
worksheet.set_row(index+1, None, zebra)
writer.save()
This shall help:
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
formatdict = {'font_name': 'Calibri', 'font_size': 10, 'font_color':'red'}
font_fmt = workbook.add_format(formatdict)
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format(formatdict)
zebra.set_bg_color('green')
for index in range(1,6,2):
worksheet.set_row(index, None, zebra)
writer.save()
Shall produce the following output:

Writing resutls into 2 different sheets in the same Excel file

can you teach me whether Python can write into a same Excel file, but 2 different spreadsheets (tabs)?
Just for example, I want to pick and write the titles of below 4 websites, and write them into the same file title.xls but respectively in its Sheet1 and Sheet 2.
www.dailynews.com
www.dailynews.co.zw
www.gulf-daily-news.com
www.dailynews.gov.bw
I do them in 2 scripts, each for 2 websites:
from bs4 import BeautifulSoup
import urllib2
import xlwt
line_in_list = ['www.dailynews.com','www.dailynews.co.zw']
# line_in_list = [www.gulf-daily-news.com','www.dailynews.gov.bw']
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('Sheet1', cell_overwrite_ok = True)
# sheet = book.add_sheet('Sheet2', cell_overwrite_ok = True)
for cor,websites in enumerate(line_in_list):
url = "http://" + websites
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
site_title = soup.find_all("title")
print site_title
sheet.write (cor, 0, site_title[0].text)
book.save("title.xls")
however, the script is overwriting the sheets. I can only have either Sheet1 or Sheet2 but never both.
any helps? thanks.
You can also do it using pandas.
import pandas as pd
# Add your data in list, which may contain a dictionary with the name of the
# columns as the key
df1 = pd.DataFrame({'website': ['www.dailynews.com', 'www.dailynews.co.zw']})
df2 = pd.DataFrame({'website': ['www.gulf-daily-news.com', 'www.dailynews.gov.bw']})
# Create a new excel workbook
writer = pd.ExcelWriter('title.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
# Save workbook
writer.close()
If I correctly understood what you need. Sorry, can't comment to make it more clear.
sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok = True)
sheet2 = book.add_sheet('Sheet2', cell_overwrite_ok = True)
sheet1.write (cor, 0, site_title[0].text)
sheet2.write (cor, 0, site_title[0].text)
import numpy as np
import pandas as pd
# Create a Dataframe
df1 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
df2 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
# Excel path
excelpath = 'path_to_your_excel.xlsx'
# Write your dataframes to difference sheets
with pd.ExcelWriter(excelpath) as writer:
df1.to_excel(writer,sheet_name='Sheet1')
df2.to_excel(writer,sheet_name = 'Sheet2')
""" I noticed that the above script overwrite all existing columns of in
the excel. In case you want to keep some columns and sheet untouched,
you might consider doing it the following way"""
import pandas as pd
import numpy as np
from openpyxl import load_workbook
book = load_workbook(excelpath)
writer = pandas.ExcelWriter(excelpath, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df1.to_excel(writer, "Sheet1", columns=['a', 'b']) # only columns 'a' and 'b' will be populated
df2.to_excel(writer,"Sheet2",columns=['a','b']) # only columns 'a' and 'b' will be populated
writer.save()
--Append Excel Data Sheet to Spreadsheet
import pandas as pd
#import os
#from pandasql import sqldf
#pysqldf = lambda q: sqldf(q, globals())
df1 = pd.read_csv('MyData1.csv')
df2 = pd.read_csv('MyData2.csv')
print(df1)
print(df2)
Differences_df = df1.merge(df2, indicator=True, how='outer')
#Differences_df[merged['_merge'] == 'right_only']
print(Differences_df)
with pd.ExcelWriter('MyInputData.xlsx', mode='a') as writer:
Differences_df.to_excel(writer, sheet_name='Diff')
print("Spreadsheets Processed")

Categories