I am trying to update sheet without overwriting the complete data but my code is creating a new sheet instead.
import csv
import openpyxl
import pandas as pd
from openpyxl import load_workbook
df1 = pd.read_csv(r'C:\Users\name\Desktop\Data_Sj.csv')
ddf = df1[
(df1['Sports'] == 'Football')
]
print(ddf)
writer = pd.ExcelWriter(r'C:\Users\name\Desktop\check\Checklist1.xlsx', engine= 'openpyxl')
book = load_workbook(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.book = book
ddf.to_excel(r'C:\Users\name\Desktop\check\Checklist1.xlsx')
writer.save()
Related
I am trying to create an excel file of 3 columns: System Date, Time, Value on a webpage at that time.
Intention is to create a dataframe of the 3 values, every time the code runs, and append the dataframe to existing excel workbook (with one existing sheet).
I am able to create dataframe every time code runs, but when I try to append it to an excel file, it throws error:
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'
Can you please suggest, where am I going wrong.
# Importing Libraries
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import openpyxl
#getting today's date amd formatting it
now = datetime.now()
Date = now.strftime ("%d/%m/%Y")
Time = now.strftime ("%H:%M")
# GET request to scrape. 'Page' variable to assign contents
page = requests.get("https://www.traderscockpit.com/?pageView=live-nse-advance-decline-ratio-chart")
# Create BeautifulSoup object to parse content
soup = BeautifulSoup(page.content, 'html.parser')
adv = soup.select_one('a:-soup-contains("Advanced:")').next_sibling.strip()
dec = soup.select_one('a:-soup-contains("Declined:")').next_sibling.strip()
ADratio = round(int(adv)/int(dec), 2)
df = pd.DataFrame({tuple([Date, Time, ADratio])})
#Load workbook and read last used row
path = r'C:\Users\kashk\OneDrive\Documents\ADratios.xlsx'
writer = pd.ExcelWriter (path, engine='openpyxl', mode = 'a')
wb = openpyxl.load_workbook(path)
startrow = writer.sheets['Sheet1'].max_row
#Append data frame to existing table in existing sheet
df.to_excel (writer, sheet_name = 'Sheet1', index = False, header = False, startrow = startrow)
writer.save()
writer.close()
A fast and easy solution would be upgrading your pandas > 1.4.0 since it provides a if_sheet_exists = 'overlay' Source
pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='overlay')
If you don't want to upgrade your pandas, there is a way to work around by removing and re-write the sheet into the excel file. (Not recommended if you have a lot of records since it will be slow).
path, sheet_name = 'ADratios.xlsx' , 'Sheet 1'
df.columns = ['Date','Time','ADratio']
with pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
book = openpyxl.load_workbook(path, 'r')
df_bak = pd.read_excel(path)
writer.book = openpyxl.load_workbook(path)
writer.book.remove(writer.book.worksheets[writer.book.sheetnames.index(sheet_name)])
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
pd.concat([df_bak, df], axis=0).to_excel(writer, sheet_name=sheet_name, index = False)
I have dictionary of dataframes.
dd = {
'table': pd.DataFrame({'Name':['Banana'], 'color':['Yellow'], 'type':'Fruit'}),
'another_table':pd.DataFrame({'city':['Atlanta'],'state':['Georgia'], 'Country':['United States']}),
'and_another_table':pd.DataFrame({'firstname':['John'], 'middlename':['Patrick'], 'lastnme':['Snow']}),
}
I would like to create an Excel file which contains Excel Table objects created from these dataframes. Each Table needs to be on a separate Tab/Sheet and Table names should match dataframe names.
Is this possible to do with Python?
So far I was only able to export data to Excel normally without converting to tables using xlsxwriter
writer = pd.ExcelWriter('Results.xlsx', engine='xlsxwriter')
for sheet, frame in dd.items():
frame.to_excel(writer, sheet_name = sheet)
writer.save()
For writing multiple sheets from Pandas, use the openpyxl library. In addition, to prevent overwriting, set the workbook sheets before each update.
Try this code:
import pandas as pd
import openpyxl
dd = {
'table': pd.DataFrame({'Name':['Banana'], 'color':['Yellow'], 'type':'Fruit'}),
'another_table':pd.DataFrame({'city':['Atlanta'],'state':['Georgia'], 'Country':['United States']}),
'and_another_table':pd.DataFrame({'firstname':['John'], 'middlename':['Patrick'], 'lastnme':['Snow']}),
}
filename = 'Results.xlsx' # must exist
wb = openpyxl.load_workbook(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl')
for sheet, frame in dd.items():
writer.sheets = dict((ws.title, ws) for ws in wb.worksheets) # need this to prevent overwrite
frame.to_excel(writer, index=False, sheet_name = sheet)
writer.save()
# convert data to tables
wb = openpyxl.load_workbook(filename)
for ws in wb.worksheets:
mxrow = ws.max_row
mxcol = ws.max_column
tab = openpyxl.worksheet.table.Table(displayName=ws.title, ref="A1:" + ws.cell(mxrow,mxcol).coordinate)
ws.add_table(tab)
wb.save(filename)
Output
I want to create a python script for cpu% to run every 5 seconds and output into excel file. I have managed to run the script once and its output in excel is below. How do i repeat it every 5 seconds and insert into excel just the value not the header-name. Please help i just started learning python.
output-
enter image description here
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import numpy as np
import psutil
CPU = psutil.cpu_percent(interval=1)
df = pd.DataFrame({'CPU': [CPU]})
writer = ExcelWriter(r'C:\Users\kumardha\Desktop\DK_TEST\Pandas3.xlsx')
df.to_excel(writer,'Sheet1',index=False)
writer.save()
I m assuming this is what you expected ..
import pandas as pd
import numpy as np
import psutil
import time
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
pass
if startrow is None:
startrow = 0
df.to_excel(writer, sheet_name, startrow=startrow,**to_excel_kwargs)
# save the workbook
writer.save()
def repeat(seconds,filename):
first_time=True
while True:
CPU = psutil.cpu_percent(interval=1)
df = pd.DataFrame({'CPU': [CPU]})
s = str(CPU)
b = print(s +' is current cpu at time '+time.ctime())
if first_time:
append_df_to_excel(filename,df,sheet_name='Sheet1',index=False)
first_time=False
else:
append_df_to_excel(filename,df,sheet_name='Sheet1',header=False,index=False)
time.sleep(seconds)
filename='path to filename'
repeat('delay you want in seconds',filename)
You can use subprocess.check to see what your output from running your script would be. Ive used this before with discord bots. I recommend you read this post: Running shell command and capturing the output
subprocess.check_output()
Good Luck
I have the below code and it errors out on move_range for some reason. I am using openpyxl version 2.5.12. I tried updating to 2.6, but then it introduced some error related to Pandas deprecated NaT feature so I'd like to stay on 2.5.12 if possible. I have included the below modules imported from openpyxl as well.
import openpyxl
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.worksheet.cell_range import CellRange
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.worksheet import worksheet
from openpyxl import worksheet
book = load_workbook(extract_file_loc)
wb = Workbook()
ws = wb.active
book.active = 4
ws = book.active
data = gpatuple
ws.append(gp_headers_tp)
for row in data:
ws.append(row)
tab = Table(displayName="PATH_FILE", ref=final_cord)
# Add a default style with striped rows and banded columns
style = TableStyleInfo(name="TableStyleMedium2", showRowStripes=True)
tab.tableStyleInfo = style
ws.add_table(tab)
ws.move_range("G4:H10", rows=-1, cols=2)
ws.move_range(final_cord, rows=-1,cols=0)
book.save("extract.xlsx")
print "complete!"
can you teach me whether Python can write into a same Excel file, but 2 different spreadsheets (tabs)?
Just for example, I want to pick and write the titles of below 4 websites, and write them into the same file title.xls but respectively in its Sheet1 and Sheet 2.
www.dailynews.com
www.dailynews.co.zw
www.gulf-daily-news.com
www.dailynews.gov.bw
I do them in 2 scripts, each for 2 websites:
from bs4 import BeautifulSoup
import urllib2
import xlwt
line_in_list = ['www.dailynews.com','www.dailynews.co.zw']
# line_in_list = [www.gulf-daily-news.com','www.dailynews.gov.bw']
book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
sheet = book.add_sheet('Sheet1', cell_overwrite_ok = True)
# sheet = book.add_sheet('Sheet2', cell_overwrite_ok = True)
for cor,websites in enumerate(line_in_list):
url = "http://" + websites
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read())
site_title = soup.find_all("title")
print site_title
sheet.write (cor, 0, site_title[0].text)
book.save("title.xls")
however, the script is overwriting the sheets. I can only have either Sheet1 or Sheet2 but never both.
any helps? thanks.
You can also do it using pandas.
import pandas as pd
# Add your data in list, which may contain a dictionary with the name of the
# columns as the key
df1 = pd.DataFrame({'website': ['www.dailynews.com', 'www.dailynews.co.zw']})
df2 = pd.DataFrame({'website': ['www.gulf-daily-news.com', 'www.dailynews.gov.bw']})
# Create a new excel workbook
writer = pd.ExcelWriter('title.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
# Save workbook
writer.close()
If I correctly understood what you need. Sorry, can't comment to make it more clear.
sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok = True)
sheet2 = book.add_sheet('Sheet2', cell_overwrite_ok = True)
sheet1.write (cor, 0, site_title[0].text)
sheet2.write (cor, 0, site_title[0].text)
import numpy as np
import pandas as pd
# Create a Dataframe
df1 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
df2 = pd.DataFrame(np.random.rand(100).reshape(50,2),columns=['a','b'])
# Excel path
excelpath = 'path_to_your_excel.xlsx'
# Write your dataframes to difference sheets
with pd.ExcelWriter(excelpath) as writer:
df1.to_excel(writer,sheet_name='Sheet1')
df2.to_excel(writer,sheet_name = 'Sheet2')
""" I noticed that the above script overwrite all existing columns of in
the excel. In case you want to keep some columns and sheet untouched,
you might consider doing it the following way"""
import pandas as pd
import numpy as np
from openpyxl import load_workbook
book = load_workbook(excelpath)
writer = pandas.ExcelWriter(excelpath, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df1.to_excel(writer, "Sheet1", columns=['a', 'b']) # only columns 'a' and 'b' will be populated
df2.to_excel(writer,"Sheet2",columns=['a','b']) # only columns 'a' and 'b' will be populated
writer.save()
--Append Excel Data Sheet to Spreadsheet
import pandas as pd
#import os
#from pandasql import sqldf
#pysqldf = lambda q: sqldf(q, globals())
df1 = pd.read_csv('MyData1.csv')
df2 = pd.read_csv('MyData2.csv')
print(df1)
print(df2)
Differences_df = df1.merge(df2, indicator=True, how='outer')
#Differences_df[merged['_merge'] == 'right_only']
print(Differences_df)
with pd.ExcelWriter('MyInputData.xlsx', mode='a') as writer:
Differences_df.to_excel(writer, sheet_name='Diff')
print("Spreadsheets Processed")