i want to plot data, that i gathered in Python, in Excel.
And if the file already exists, it should just add another Sheet.
But when i try to add a file it gives me the Error "workbook" object has no attribute "add_chart"
Here is the code that i got:
import pandas as pd
import openpyxl
import xlsxwriter
from pathlib import Path
def Convert(self):
path_to_file = '{0}.xlsx'.format(self.Entry_ExcelName.get())
path = Path(path_to_file)
excel_file = '{0}.xlsx'.format(self.Entry_ExcelName.get())
if path.is_file():
writer = pd.ExcelWriter(excel_file, engine='openpyxl', mode='a')
else:
writer = pd.ExcelWriter(excel_file, engine='xlsxwriter')
list_data_conv = self.list_data
df=pd.DataFrame(list_data_conv)
sheet_name = '{0}'.format(self.Entry_SheetName.get())
df.to_excel(writer, sheet_name=sheet_name)
workbook=writer.book
worksheet=writer.sheets[sheet_name]
chart=workbook.add_chart({'type': 'line'})
i=len(list_data_conv)
chart.add_series({
'categories': ['{}'.format(sheet_name) ,1,1,i,1],
'values': ['{}'.format(sheet_name) ,1,2,i,2]
})
chart.set_x_axis({'name': 'Index', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Value', 'major_gridlines': {'visible': False}})
chart.set_legend({'position': 'none'})
worksheet.insert_chart('D2', chart)
writer.save()
I know there is already a similiar question like mine, however that has not fixed my problem.
I feel like i can't see the forest for the trees.
Related
I am trying to create an excel file of 3 columns: System Date, Time, Value on a webpage at that time.
Intention is to create a dataframe of the 3 values, every time the code runs, and append the dataframe to existing excel workbook (with one existing sheet).
I am able to create dataframe every time code runs, but when I try to append it to an excel file, it throws error:
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'
Can you please suggest, where am I going wrong.
# Importing Libraries
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import openpyxl
#getting today's date amd formatting it
now = datetime.now()
Date = now.strftime ("%d/%m/%Y")
Time = now.strftime ("%H:%M")
# GET request to scrape. 'Page' variable to assign contents
page = requests.get("https://www.traderscockpit.com/?pageView=live-nse-advance-decline-ratio-chart")
# Create BeautifulSoup object to parse content
soup = BeautifulSoup(page.content, 'html.parser')
adv = soup.select_one('a:-soup-contains("Advanced:")').next_sibling.strip()
dec = soup.select_one('a:-soup-contains("Declined:")').next_sibling.strip()
ADratio = round(int(adv)/int(dec), 2)
df = pd.DataFrame({tuple([Date, Time, ADratio])})
#Load workbook and read last used row
path = r'C:\Users\kashk\OneDrive\Documents\ADratios.xlsx'
writer = pd.ExcelWriter (path, engine='openpyxl', mode = 'a')
wb = openpyxl.load_workbook(path)
startrow = writer.sheets['Sheet1'].max_row
#Append data frame to existing table in existing sheet
df.to_excel (writer, sheet_name = 'Sheet1', index = False, header = False, startrow = startrow)
writer.save()
writer.close()
A fast and easy solution would be upgrading your pandas > 1.4.0 since it provides a if_sheet_exists = 'overlay' Source
pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='overlay')
If you don't want to upgrade your pandas, there is a way to work around by removing and re-write the sheet into the excel file. (Not recommended if you have a lot of records since it will be slow).
path, sheet_name = 'ADratios.xlsx' , 'Sheet 1'
df.columns = ['Date','Time','ADratio']
with pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
book = openpyxl.load_workbook(path, 'r')
df_bak = pd.read_excel(path)
writer.book = openpyxl.load_workbook(path)
writer.book.remove(writer.book.worksheets[writer.book.sheetnames.index(sheet_name)])
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
pd.concat([df_bak, df], axis=0).to_excel(writer, sheet_name=sheet_name, index = False)
I'm doing some simple conditional formatting using xlsxwriter but I am getting this error when I run the code below.
AttributeError: 'Workbook' object has no attribute 'add_format'
I have updated xlsxwriter and looked at a lot of questions on SO and documentation but nothing has worked yet.
This is my code:
workbook = load_workbook(input_excel_filename)
writer = pd.ExcelWriter(input_excel_filename, engine="xlsxwriter")
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
trends_sheet = writer.sheets["Trends"]
slight_increase = writer.book.add_format({"bg_color":"#d3e6d5"})
trends_sheet.conditional_format("E:E", {"type":"cell", "criteria":"==", "value":"Slight Increase", "format":slight_increase})
Check if xlsxwriter package is installed or not....even I faced the same issue..resolved it after installing the package...same answer goes for any attribute error issue related to workbook/writer if your code is correct
Cause and solution
Makesure variable is usable
such as mine
first: workbook = writer.book
then: header_format = workbook.add_format(
Makesure already set pandas's engine (here using xlsxwriter)
when init ExcelWriter, set your engine
writer = pd.ExcelWriter(outputFile, engine='xlsxwriter’, options={'strings_to_urls': False} )
Makesure already installed related lib (xlsxwriter)
pip install xlsxwriter
or mine: pipenv install xlsxwriter
Full code for refer
import pandas as pd
writer = pd.ExcelWriter(
output_final_total_file,
engine='xlsxwriter',
options={'strings_to_urls': False}
)
...
df = pd.read_csv(outputExcelFile, sep=pandas_sep)
...
df.to_excel(outputExcelFile.replace('.csv', '.xlsx'), index=False)
...
df.to_excel(writer, sheet_name=SheetNamePay, startrow=1, header=False, index=False)
...
workbook = writer.book
header_format = workbook.add_format( # !!! here workable, no error
{
'bold': True,
'text_wrap': True,
# 'valign': 'top',
'valign': 'center',
# 'fg_color': '#D7E4BC',
'bg_color': '#edbd93',
'border': 1
}
)
Part of the problem was I needed to set writer.book explicitly. So add the line writer.book = workbook after defining writer. Also adding engine="openpyxl" to the ExcelWriter got rid of a subsequent error. Altogether this seems to work:
workbook = load_workbook(input_excel_filename)
writer = pd.ExcelWriter(input_excel_filename, engine="openpyxl")
writer.book = workbook
writer.sheets = dict((ws.title, ws) for ws in wb.worksheets)
data.to_excel(writer, sheet_name="Data", index=False)
writer.save()
writer.close()
I couldn't get it to work with conditional formatting but setting formatting in the Excel spreadsheet directly actually seems to work, because even if the data is rewritten the formatting stays intact.
I am trying to update sheet without overwriting the complete data but my code is creating a new sheet instead.
import csv
import openpyxl
import pandas as pd
from openpyxl import load_workbook
df1 = pd.read_csv(r'C:\Users\name\Desktop\Data_Sj.csv')
ddf = df1[
(df1['Sports'] == 'Football')
]
print(ddf)
writer = pd.ExcelWriter(r'C:\Users\name\Desktop\check\Checklist1.xlsx', engine= 'openpyxl')
book = load_workbook(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.book = book
ddf.to_excel(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.save()
def multiple_dfs(file_name, sheet, *args):
"""
Put multiple dataframes into one xlsx sheet
"""
row=2
writer = pd.ExcelWriter(file_name, engine='openpyxl')
df = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df2 = pd.DataFrame(['Title'])
df.to_excel(writer, sheet, startrow=row, index=False)
df2.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
I would like to resize and bold Title from this code with pandas (or with openpyxl if it is not possible with pandas). Is there a way to do it accurately and "pythonically"?
I know we could use http://openpyxl.readthedocs.io/en/default/styles.html, but I don't know how to use it with my code.
UPDATE
According to #AndyHayden, normally the following code should bold the title, but that seems to not working.
import pandas as pd
import io, os, openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import (PatternFill, Border, Side, Alignment, Protection, Font)
def multiple_dfs(sheet):
row=2
writer = pd.ExcelWriter("testing.xlsx", engine='openpyxl')
df = pd.DataFrame(['Title'])
df2 = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
df2.to_excel(writer, sheet, startrow=row, index=False)
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
title = ws["A1"]; title.font = Font(bold=True)
wb.save("testing.xlsx")
writer.save()
writer.close()
multiple_dfs('aaa')
The title is never showed up in bold. How could I fix that?
Please let me know if the question is unclear.
The openpyxl docs offer a neat way to do this:
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=True, header=True):
ws.append(r)
for cell in ws['A'] + ws[1]:
cell.style = 'Pandas'
wb.save("pandas_openpyxl.xlsx")
Note: That if you are doing some other inserting beforehand (like you suggest in your previous question, you'll have to use a different row number.
can you teach me whether Python can write into a same Excel file, but 2 different spreadsheets (tabs)?
Just for example, I want to pick and write the titles of below 4 websites, and write them into the same file title.xls but respectively in its Sheet1 and Sheet 2.
www.dailynews.com
www.dailynews.co.zw
www.gulf-daily-news.com
www.dailynews.gov.bw
I do them in 2 scripts, each for 2 websites:
from bs4 import BeautifulSoup
import urllib2
import xlwt
line_in_list = ['www.dailynews.com','www.dailynews.co.zw']
# line_in_list = [www.gulf-daily-news.com','www.dailynews.gov.bw']
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('Sheet1', cell_overwrite_ok = True)
# sheet = book.add_sheet('Sheet2', cell_overwrite_ok = True)
for cor,websites in enumerate(line_in_list):
url = "http://" + websites
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
site_title = soup.find_all("title")
print site_title
sheet.write (cor, 0, site_title[0].text)
book.save("title.xls")
however, the script is overwriting the sheets. I can only have either Sheet1 or Sheet2 but never both.
any helps? thanks.
You can also do it using pandas.
import pandas as pd
# Add your data in list, which may contain a dictionary with the name of the
# columns as the key
df1 = pd.DataFrame({'website': ['www.dailynews.com', 'www.dailynews.co.zw']})
df2 = pd.DataFrame({'website': ['www.gulf-daily-news.com', 'www.dailynews.gov.bw']})
# Create a new excel workbook
writer = pd.ExcelWriter('title.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
# Save workbook
writer.close()
If I correctly understood what you need. Sorry, can't comment to make it more clear.
sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok = True)
sheet2 = book.add_sheet('Sheet2', cell_overwrite_ok = True)
sheet1.write (cor, 0, site_title[0].text)
sheet2.write (cor, 0, site_title[0].text)
import numpy as np
import pandas as pd
# Create a Dataframe
df1 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
df2 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
# Excel path
excelpath = 'path_to_your_excel.xlsx'
# Write your dataframes to difference sheets
with pd.ExcelWriter(excelpath) as writer:
df1.to_excel(writer,sheet_name='Sheet1')
df2.to_excel(writer,sheet_name = 'Sheet2')
""" I noticed that the above script overwrite all existing columns of in
the excel. In case you want to keep some columns and sheet untouched,
you might consider doing it the following way"""
import pandas as pd
import numpy as np
from openpyxl import load_workbook
book = load_workbook(excelpath)
writer = pandas.ExcelWriter(excelpath, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df1.to_excel(writer, "Sheet1", columns=['a', 'b']) # only columns 'a' and 'b' will be populated
df2.to_excel(writer,"Sheet2",columns=['a','b']) # only columns 'a' and 'b' will be populated
writer.save()
--Append Excel Data Sheet to Spreadsheet
import pandas as pd
#import os
#from pandasql import sqldf
#pysqldf = lambda q: sqldf(q, globals())
df1 = pd.read_csv('MyData1.csv')
df2 = pd.read_csv('MyData2.csv')
print(df1)
print(df2)
Differences_df = df1.merge(df2, indicator=True, how='outer')
#Differences_df[merged['_merge'] == 'right_only']
print(Differences_df)
with pd.ExcelWriter('MyInputData.xlsx', mode='a') as writer:
Differences_df.to_excel(writer, sheet_name='Diff')
print("Spreadsheets Processed")