Change the name of excel worksheet with pandas - python

import pandas as pd
import numpy as np
df = pd.read_excel(r"C:\Users\venkagop\Subbu\promo validation testing\P 02. Promotions-UK C1.xls")
df = df[['Promotions', 'Promotions: AE', 'Promotions: Anaplan ID', 'Promotions: Is Optima Scenario?', 'Promotions: SIDs', 'Set Inactive?', 'Start Date', 'End Date', 'Promo Period', 'Promo Optima Status', 'Change Promo Status']]
df = df[(df['Promo Period'] == 'FY1819')]
df = df[(df['Set Inactive?'] == 0 ) & (df['Promotions: Is Optima Scenario?'] == 1)]
df.dropna(subset=['Promotions: SIDs'], inplace=True)
df['Optima vs Anaplan Promo Status Validation'] = ""
df['Optima vs Anaplan Promo Status Validation'] = np.where(df['Promo Optima Status'] == df['Change Promo Status'], 'True', 'False')
df.to_excel(r"C:\Users\venkagop\Subbu\mytest.xls", index = False)
#after this i want to change sheeet1 name to some other name#

There are 2 ways you can approach this problem.
Approach 1
Save the excel file to the correct worksheet name from the beginning, by using the sheet_name argument.
import pandas as pd
writer = pd.ExcelWriter(r'C:\Users\venkagop\Subbu\mytest.xls')
df.to_excel(writer, sheet_name='MySheetName', index=False)
writer.save()
Approach 2
If Approach 1 is not possible, change the worksheet name at a later stage using openpyxl. The advantage of this method is you remove the cost of converting pandas dataframe to Excel format again.
import openpyxl
file_loc = r'C:\Users\venkagop\Subbu\mytest.xls'
ss = openpyxl.load_workbook(file_loc)
ss_sheet = ss.get_sheet_by_name('Sheet1')
ss_sheet.title = 'MySheetName'
ss.save(file_loc)

Related

The streamlit does not refresh the dataframe on the localhost

I am New in pandas and streamlit , What I am trying is to filter such a dataframe using streamlit selectbox
but unfortunately everything is going well except that when changing the filter value it does not reflect on the shown table
as you could see the name in the filter does not update the table
here is the code I have used:
import xlrd
import pandas as pd
import os
from datetime import datetime
import streamlit as st
# import plotly_express as px
# to refer to the file
# change the current directory
currentDir = os.chdir('C:\\Users\\user\\Desktop\\lists');
files=os.listdir(currentDir)
columns=['Name','status','memorize-from-surah','memorize-from-ayah','memorize-to-surah','memorize-to-ayah','memorization-grade','words-meaning-grade','revision-from-surah','revision-from-ayah','revision-to-surah','revision-to-ayah','revision-grade']
folderDF=pd.DataFrame()
for file in files:
# get the file name without extension for the sheikh name
sheikh=os.path.splitext(file)[0]
sheetDF=pd.DataFrame()
workbook = pd.ExcelFile(f'C:\\users\\user\\Desktop\\lists\\{file}')
sheets_numbers = len(workbook.sheet_names)
print(sheets_numbers)
for i in range(1, sheets_numbers-1):
# print(workbook.sheet_by_index(i).name)
current_sheet = pd.read_excel(file,sheet_name=i,header=None,index_col=1)
date= current_sheet.iloc[6, 10]
# for j in range(7,current_sheet.nrows):
# if current_sheet.cell(j,3).value=="غاب" or current_sheet.cell(j,3).value=="عذر":
# for k in range(4,current_sheet.ncols):
# current_sheet.cell(j,k).value=""
sheets=pd.read_excel(file,sheet_name=i,skiprows=11,header=None,index_col=1)
# df = pd.DataFrame(sheets.iloc[:,1:], index=index)
#remove the first col
df=pd.DataFrame(sheets.iloc[:,1:])
#remove empty rows
df=df[df.iloc[:,0].notna()]
#rename the columns
df.columns = columns
#get the nrows
nrows= len(df.index)
sheikhCol=pd.Series(nrows*sheikh)
dateCol=pd.Series(nrows*date)
halkaCol=pd.Series(nrows*i)
# df.insert(1,"sheikh",sheikhCol)
df.insert(1,"halka",halkaCol)
df.insert(2,"sheikh",sheikhCol)
df.insert(3,"date",dateCol)
df["sheikh"]=sheikh
df['date']=date
df['halka']=i
if i == 1:
sheetDF=pd.DataFrame(df)
datatoexcel = pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
sheetDF.to_excel(datatoexcel)
datatoexcel.save()
else:
sheetDF = pd.concat([sheetDF, df], axis=0)
folderDF=pd.concat([folderDF,sheetDF],axis=0)
datatoexcel=pd.ExcelWriter('C:\\users\\user\\Desktop\\dataOut.xlsx')
folderDF.to_excel(datatoexcel)
datatoexcel.save()
#
# setting up the streamlit page
st.set_page_config(page_title='makraa reports',layout='wide')
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter= folderDF['Name'].unique()
halkaFilter= folderDF['halka'].unique()
sheikhFilter= folderDF['sheikh'].unique()
student_choice= st.sidebar.selectbox("select the student Name",nameFilter)
halka_choice= st.sidebar.selectbox("select the halka Number",halkaFilter)
sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
# filteredDf=folderDF[folderDF["Name"]== student_choice]
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf=folderDF.query('Name==#student_choice')
st.write(filteredDf)
note st.dataframe(filteredDf) does not make any difference
the streamlit version I used is 0.75 , since the recent version gave me the StreamlitAPIException like that enter link description here
could you give a hand in this
Here is a sample code with example data.
Code
import streamlit as st
import pandas as pd
data = {
'Name': ['a', 'b', 'c'],
'halka': [1, 2, 3]
}
st.set_page_config(page_title='makraa reports',layout='wide')
folderDF = pd.DataFrame(data)
# make filteration
#
st.sidebar.header("make filtration criteria")
nameFilter = folderDF['Name'].unique()
halkaFilter = folderDF['halka'].unique()
# sheikhFilter = folderDF['sheikh'].unique()
student_choice = st.sidebar.selectbox("select the student Name", nameFilter)
halka_choice = st.sidebar.selectbox("select the halka Number", halkaFilter)
# sheikh_choice= st.sidebar.selectbox("select the sheikh Number",sheikhFilter)
# student_choice2= st.sidebar.multiselect("select the student Name",options=nameFilter,default=nameFilter)
filteredDf = folderDF[(folderDF["Name"] == student_choice) & (folderDF["halka"] == halka_choice)]
# filteredDf = filteredDf[filteredDf["halka"] == halkaFilter]
st.write(filteredDf)
Output

how to pysimplegui manage dataframe in excel?

i want to delete dataframe unnamed in red arrow , and the i want to add some text when i click submit i have index from 1 not in 0 like this
Data_entry.xlsx
can you give me solution about this problem ?
in above my code in python 3
import PySimpleGUI as sg
import pandas as pd
# importing openpyxl module
import openpyxl
# Give the location of the file
path = "C:\\Users\\Admin\\Desktop\\WEB\\PYTHON_PYSIMPLEGUI\\Data_Entry.xlsx"
sg.theme('DarkGreen7')
EXCEL_FILE = 'Data_Entry.xlsx'
df = pd.read_excel(EXCEL_FILE)
my_img = sg.Image(r'C:\Users\master\Desktop\WEB\PYTHON_PYSIMPLEGUI\logo.png')
"""
# Template Taskbar
menu_def = [['File', ['Open', 'Save', 'Exit',]],
['Edit', ['Paste', ['Special', 'Normal',], 'Undo'],],
['Help', 'About...'],]
"""
menu_def = [['File', ['Exit']],['Help', 'About...'],]
layout = [
[sg.Menu(menu_def)],
[sg.Column([[my_img]], justification='center')],
[sg.Text('Simacan ( SIstem Monitoring And Controling Absen Nilai')],
[sg.Text('Nama', size=(15,1)), sg.InputText(key='Nama')],
[sg.Text('Kehadiran', size=(15,1)), sg.Combo(['Hadir', 'Sakit', 'Tidak Masuk'], key='Keterangan')],
[sg.Submit(), sg.Exit()]
]
window =sg.Window('Aplikasi Simacan versi 1.2', layout,size=(800, 600), font='Courier 12')
while True:
event, values = window.read()
if event == sg.WIN_CLOSED or event == 'Exit':
break
if event == 'Submit':
df = df.append(values, ignore_index=True)
df.to_excel(EXCEL_FILE, index=True)
sg.popup('Data saved !')
print(event, values)
window.close()
There's are some issues here.
Create a blank excel file to store your data.
Open excel file as a dataframe, option index_col set the index column or you may get Unnamed: 0 column as index in your dataframe.
df = pd.read_excel(EXCEL_FILE, index_col=[0])
There's one extra item 0 in dictionary values, it is the key of sg.Menu, should remove it before you append the values to your dataframe by
del values[0]
You can re-index your dataframe by
df.index = np.arange(1, len(df)+1)
You can save dataframe to your excel file only before end of you script, of course, write each new record to your excel file is still fine.

Add new column to DataFrame with same default value

I would like to add a name column based on the 'lNames' list. But my code is overwriting the whole column in the last iteration as follows:
import pandas as pd
def consulta_bc(codigo_bcb):
url = 'http://api.bcb.gov.br/dados/serie/bcdata.sgs.{}/dados?formato=json'.format(codigo_bcb)
df = pd.read_json(url)
df['data'] = pd.to_datetime(df['data'], dayfirst=True)
df.set_index('data', inplace=True)
return df
lCodigos = [12, 11, 1, 21619, 21623, 12466]
lNames = ['CDI', 'SELIC', 'USD', 'EUR', 'GPB', 'IMAB']
iter_len = len(lCodigos)
saida = pd.DataFrame()
for i in range(iter_len):
saida = saida.append(consulta_bc(lCodigos[i]))
saida['nome']= lNames[i]
saida.to_csv('Indice', sep=';', index=True)
saida
Any help will be fully appreciated
Change the for loop in this way:
for i in range(iter_len):
df = consulta_bc(lCodigos[i])
df['nome'] = lNames[i]
saida = saida.append(df)

Looping through a spreadsheet and plotting all columns with xlsxwriter, Pandas

I bet I'm really close here. I'm trying to look at spreadsheets with potentially 100's of columns and create a plot in a new spreadsheet for each column on the fly. I've got a few of these working where I simply call multiple calls to chart1,chart2,chart3......
What I have below loops fine, inserts the data in the first sheet, creates the second sheet and inserts only the first chart. How do I write the loop to create "n" charts?
I bet it's a silly trivial thing.
Thanks in advance.
Input Data:
https://drive.google.com/open?id=1sts5axnT7aQ04zHv8nPwhnrQPDb7oZlV
import pandas as pd
import codecs
import csv
import os
import xlsxwriter
import datetime
df3 = pd.read_csv('TEST.csv', index_col=0, header=[0], low_memory=False, na_filter=True, encoding='utf-8')
writer2 = pd.ExcelWriter('TEST.xlsx', engine='xlsxwriter')
#define the sheetname
stage = 10
sheetname1 = "Stage_" +str(stage)
print(sheetname1)
df3.to_excel(writer2, sheet_name = sheetname1 , startrow=4, startcol=0, encoding='utf8')
maxcol = df3.shape[1]-1
maxlen = df3.shape[0]-1
print(maxcol, maxlen)
#set the workbook value
c = df3[['TWO']]
workbook = writer2.book
#set the worksheet value
sheetname2 = "Stage_" +str(stage) +str("_P")
c.to_excel(writer2, sheet_name = sheetname2 , startrow=4, startcol=0, encoding='utf8')
print(sheetname2)
worksheet = writer2.sheets[ sheetname2 ]
for i in range(2, maxcol):
TITLE = df3.columns[i]
LOC_NUM = (i - 1) * 34 - 33
LOCATION = "C" +str(LOC_NUM)
print("TITLE:", TITLE, "GRAPH_LOCATION:", LOC_NUM,LOCATION, "LENGTH:", maxlen, "INDICE:", i)
chart = workbook.add_chart({'type': 'line'})
chart.set_size({'width': 1200, 'height': 640})
chart.add_series({"values" : [ sheetname1 , 7, i ,maxlen, i ],"name" : TITLE })
chart.set_x_axis({'name': 'Time (s)', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Test', 'major_gridlines': {'visible': False}})
# Turn off chart legend. It is on by default in Excel.
chart.set_legend({'position': 'none'})
worksheet.insert_chart( LOCATION , chart )
writer2.save()

Dropping Columns From Data frame to only show needed ones

Basically i want to drop some columns that i don't need. And i'm kind of stumped why this is not working
import os
import pandas
def summarise(indir, outfile):
os.chdir(indir)
filelist = ".txt"
dflist = []
colnames = ["DSP Code", "Report Date", "Initial Date", "End Date", "Transaction Type", "Sale Type",
"Distribution Channel", "Products Origin ID", "Product ID", "Artist", "Title", "Units Sold",
"Retail Price", "Dealer Price", "Additional Revenue", "Warner Share", "Entity to be billed",
"E retailer name", "E retailer Country", "End Consumer Country", "Price Code", "Currency Code"]
for filename in filelist:
print(filename)
df = pandas.read_csv('SYB_M_20171001_20171031.txt', header=None, encoding='utf-8', sep='\t', names=colnames,
skiprows=3)
df['data_revenue'] = df['Units Sold'] * df['Dealer Price'] # Multiplying Units with Dealer price = Revenue
df = df.sort_values(['End Consumer Country', 'Currency Code']) # Sorts the columns alphabetically
df.to_csv(outfile + r"\output.csv", index=None)
dflist.append(filename)
df.drop(columns='DSP Code')
summarise(r"O:\James Upson\Sound Track Your Brand Testing\SYB Test",
r"O:\James Upson\Sound Track Your Brand Testing\SYB Test Formatted")
I want to drop all the column titles you can see in colnames excluding 'Units Sold', 'Dealer Price', 'End Consumer Country', 'Currency Code'. I tried to remove one column using df.drop(columns='DSP Code') but this doesn't seem to work.
Any help would be greatly appreciated :)
You Can do it like :
df.drop(['Col_1', 'col_2'], axis=1, inplace=True)
OR:
df = df.drop(columns=colnames)
As suggested in comment section use usecols which provides a kind of filter to trim down the column section to use only which are require rest columns will not be processes and thus efficiency will be increased and resource consumption will also be less:
df = pandas.read_csv('SYB_M_20171001_20171031.txt', encoding='utf-8', sep='\t', usecols=["col1", "col2", "col3"],skiprows=3)
df.drop(columns='DSP Code')
this bit is not working cause you are not assigning it to a new df
df = df.drop(columns='DSP Code')
You can also just keep the columns you care about by copying them into a second dataframe.
According to pandas.DataFrame.drop, it returns a dataframe unless you do the operation inplace.
Returns:
dropped : pandas.DataFrame
inplace : bool, default False
If True, do operation inplace and return None.
Either do it in place: df.drop(columns=['DSP Code'], inplace=True) or store the returned dataframe: df=df.drop(columns=['DSP Code'])
Just do:
df = df['Units Sold', 'Dealer Price', 'End Consumer Country', 'Currency Code']
You keep the ones you want, instead of dropping the others.

Categories