How to display pandas dataframe properly using tkinter? - python

I am fairly new to python and I am attempting to create a tool which displays the number of rows and columns of all sheets of Excel workbooks in a folder. I am looking to display a data frame as the final result using tkinter, however the display is not coming out correctly as the last two columns of the dataframe appear on a new line. I was wondering how to rectify this issue. I have tried using PyQT5, but this kept crashing my Kernel, and I have tried using Treeviews, I but can't figure out how to write this dataframe properly to a Treeview. Below is my current code:
import pandas as pd
import tkinter as tk
import glob
import os
import xlrd
def folder_row_count():
folder_path = f_path_entry.get()
file_extension = file_ext_var.get()
window = tk.Tk()
t1 = tk.Text(window)
t1.grid()
if file_extension == "xlsx":
filenames = []
sheetnames = []
sheetrows = []
sheetcols = []
for fname in glob.glob(os.path.join(folder_path, f"*.{file_extension}")):
wb = xlrd.open_workbook(fname)
filename = []
sheetname = []
sheetrow = []
sheetcol = []
for sheet in wb.sheets():
filename.append(os.path.basename(fname))
sheetname.append(sheet.name)
sheetrow.append(sheet.nrows)
sheetcol.append(sheet.ncols)
filenames.append(filename)
sheetnames.append(sheetname)
sheetrows.append(sheetrow)
sheetcols.append(sheetcol)
flat_filenames = [item for filename in filenames for item in filename]
flat_sheetnames = [item for sheetname in sheetnames for item in sheetname]
flat_sheetrows = [item for sheetrow in sheetrows for item in sheetrow]
flat_sheetcols = [item for sheetcol in sheetcols for item in sheetcol]
df = pd.DataFrame({'File Name': flat_filenames,
'Sheet Name': flat_sheetnames,
'Number Of Rows': flat_sheetrows,
'Number Of Columns': flat_sheetcols
})
main_df = df.append(df.sum(numeric_only = True).rename('Total'))
t1.insert(tk.END, main_df)
window.mainloop()
file_ext_list = ["xlsx"]
window = tk.Tk()
window.title("Row Counter")
tk.Label(window, text = "Choose File Type:").grid(row = 1, column = 0)
file_ext_var = tk.StringVar(window)
file_ext_dd = tk.OptionMenu(window, file_ext_var, *file_ext_list)
file_ext_dd.config(width = 10)
file_ext_dd.grid(row = 1, column = 1)
tk.Label(window, text = "Folder Path:").grid(row = 2, column = 0)
f_path_entry = tk.Entry(window)
f_path_entry.grid(row = 2, column = 1)
tk.Button(window, text = "Count Rows", command = folder_row_count).grid(row = 4, column = 1)
window.mainloop()
Secondly, I would greatly appreciate any commentary on how I can improve upon this code and make it more efficient.
Thanks in advance.

You just need to iterate over your df by iterrows and insert them into your Treeview. Below is a basic sample:
import tkinter as tk
from tkinter import ttk
import pandas as pd
root = tk.Tk()
sample = {"File Name":[f"file_{i}" for i in range(5)],
'Sheet Name': [f"sheet_{i}" for i in range(5)],
'Number Of Rows': [f"row_{i}" for i in range(5)],
'Number Of Columns': [f"col_{i}" for i in range(5)]
}
df = pd.DataFrame(sample)
cols = list(df.columns)
tree = ttk.Treeview(root)
tree.pack()
tree["columns"] = cols
for i in cols:
tree.column(i, anchor="w")
tree.heading(i, text=i, anchor='w')
for index, row in df.iterrows():
tree.insert("",0,text=index,values=list(row))
root.mainloop()
Also I see you are using xlrd to first read your excel before turning it into a Dataframe. Why don't you use pandas.read_excel instead?

Related

How to make a dropdown list from an existing list in python (pandas and tkinter)

I want the user to submit their csv excel file and choose the columns from a dropdown menu he wants for analysis.
import pandas as pd
import os
import sys
from tkinter import *
root = Tk()
root.title('Eng3')
filepath = input('Enter filepath: ')
assert os.path.exists(filepath), "I did not find the file at, " + str(filepath)
f = open(filepath, 'r+')
print("Hooray we found your file!")
f.close()
file = pd.read_csv(filepath, encoding='latin1', delimiter=',')
column_list = file.columns.tolist()
print(column_list)
So I made the columns names from the excel file into a list. How can I make a dropdown menu from this list(column_list) to show all column names? When I tried:
tkvar = StringVar(column_list)
menu = OptionMenu(root, tvkar, column_list)
I get this error:
AttributeError: 'list' object has no attribute '_root'
I looked around and found this post How can I create a dropdown menu from a List in Tkinter?. Very useful
file = pd.read_csv(filepath, encoding='latin1', delimiter=',')
column_list = file.columns.tolist() #convert pandas dataframe to simple python list
OPTIONS = column_list #this is what solved my problem
master = Tk()
master.title('Eng3')
variable = StringVar(master)
variable.set(OPTIONS[0]) # default value
w = OptionMenu(master, variable, *OPTIONS)
w.pack()
def ok():
print ("value is:" + variable.get())
button = Button(master, text="OK", command=ok)
button.pack()

Replace character in multiple columns of panda dataframe

I have a dataframe that I create by reading a XLSX file and I need to replace a lot of dot by commas because I don't know why in Excel it shows a comma but in panda dataframe it is a dot instead for decimal separator. I have 29 columns to replace dot by commas, so I figured out it would be better to use a list to store all my columns name and a for loop to iterate though all the column where I want to replace those dots by commas.
But unfortunately I got error when I tried the following code :
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
import win32com.client
import pandas as pd
import pathlib
root = tk.Tk()
canvas1 = tk.Canvas(root, width=300, height=300, bg='lightsteelblue2', relief='raised')
canvas1.pack()
label1 = tk.Label(root, text='File Conversion Tool', bg='lightsteelblue2')
label1.config(font=('helvetica', 20))
canvas1.create_window(150, 60, window=label1)
read_file = pd.DataFrame()
def get_excel_onefolder():
global read_file
import_dir_path = filedialog.askdirectory()
file_ext = "*.xlsx"
list_xlsx_file = list(pathlib.Path(import_dir_path).glob(file_ext))
lst_rpl = ['col24', 'col25', 'col26', 'col45', 'col46', 'col47', 'col69', 'col75', 'col76', 'col77', 'col105', 'col106',
'col107', 'col108', 'col109', 'col110', 'col111', 'col112', 'col254', 'col255', 'col256', 'col257', 'col258',
'col259', 'col260', 'col261', 'col262', 'col352', 'col353']
len_lst = len(lst_rpl)
for xlsx_file_path in list_xlsx_file:
read_file = pd.read_excel(xlsx_file_path)
read_file['Time'] = read_file['Time'].str.replace(',', '.')
for i in range(len_lst):
read_file[lst_rpl[i]] = read_file[lst_rpl[i]].str.replace('.', ',')
output_path = str(xlsx_file_path) + ".csv"
read_file.to_csv(output_path, index=None, header=True, decimal=',', sep=';')
tk.messagebox.showinfo(title="Import success", message="CSV file import successful !")
XLSX_to_CSV = tk.Button(text="Import Excel File & Convert to CSV", command=get_excel_onefolder, bg='green', fg='white', font=('helvetica', 12, 'bold'))
canvas1.create_window(150, 180, window=XLSX_to_CSV)
root.mainloop()
The error I got is KeyError: 'col24'
Edit :
I fixed my problem by adding argument for NaN value, na_values to .read_excel with decimal=',' and it works fine now
read_file = pd.read_excel(xlsx_file_path, decimal=',', na_values=['#NV', ' '])
My problem was because I had column which was not recognized as float because of NaN value.
The working function is now :
def get_excel_onefolder():
global read_file
import_dir_path = filedialog.askdirectory()
file_ext = "*.xlsx"
list_xlsx_file = list(pathlib.Path(import_dir_path).glob(file_ext))
for xlsx_file_path in list_xlsx_file:
read_file = pd.read_excel(xlsx_file_path, decimal=',', na_values=['#NV', ' '])
read_file['Time'] = read_file['Time'].str.replace(',', '.')
path_without_ext = os.path.splitext(str(xlsx_file_path))[0]
output_path = path_without_ext + ".csv"
read_file.to_csv(output_path, index=None, header=True, decimal=',', sep=';')
tk.messagebox.showinfo(title="Import success", message="CSV file import successful !")
pandas tries to autoformat commas into dots. You can change this behavior with the decimal parameter:
read_file = pd.read_excel(xlsx_file_path) -> read_file = pd.read_excel(xlsx_file_path, decimal=",")
for x in list_col:
df[list_col] = df[list_col].apply(lamba x: str(x).replace('.' , ',') if '.' in str(x) else x)
Don't know whether this code might be helpful to you or not. This snippet helps you to replace dots as commas in all the columns if a dot is present.

How to read Excel file, create a QR code?

I was trying with this code
from openpyxl import load_workbook
import qrcode
wb = load_workbook("D:\QR\qrcodes.xlsx")
ws = wb.['Sheet1']
column = ws['A'] # Column
data = [column[x].value for x in range(len(column))]
print(data)
qr = qrcode.QRCode(version = 1, error_correction = qrcode.constants.ERROR_CORRECT_H,box_size = 10, border = 4)
ext=".png"
for images in data:
qr.add_data(i)
qr.make(fit=True)
img=qr.make_image()
img.save("{}{}".format(i,ext))
But after every loop the image created contains the value of the previous image also, how to solve that?
You are creating the QR object outside the loop.
You're better off initializing the object <class 'qrcode.main.QRCode'> inside your for loop and also using a function to create your QR image as variables inside a function have a local scope.
ext=".png"
def createQr(data):
qr = qrcode.QRCode(version = 1, error_correction = qrcode.constants.ERROR_CORRECT_H,box_size = 10, border = 4)
qr.add_data(data)
qr.make(fit=True)
img=qr.make_image()
return img
for i in data:
img = createQr(i)
img.save("{}{}".format(i,ext))
Also as mentioned by #martineau you have to change your loop variable from images to i
Firstly please convert to csv. Then you should add enumerate so you will also have an index number for your file names instead of having multiple file.jpg.
import csv
import qrcode
with open('D:\QR\qrcodes.csv') as csvfile:
fieldnames= ["Your_Column"]
reader= csv.reader(csvfile)
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
for i, row in enumerate(reader):
labeldata = row[0]
qr.add_data(labeldata)
qr.make(fit=True)
img = qr.make_image()
img.save("test{}.jpg".format(i))
From an Excel file, it reads data in "A" columns starting from the second row and produces their QR codes, and creates a new excel file named "qrcode_produced" that has QR codes produced in the B column.
# modules needed
import qrcode
from tkinter import filedialog
from tkinter import *
import openpyxl
from openpyxl import Workbook
from openpyxl.styles import Alignment
from openpyxl import load_workbook
#select the excel file to be read
# the texts must be in the "A" column starting with "2" row. In the B column, qrcodes will be seen.
print('select xlsx file:')
root = Tk()
root.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("xlsx files","*.xlsx"),("all files","*.*")))
print (root.filename)
# select the folder to save qrcodes as png format images and excel file with qrcodes
print('where to save excel file and qrcodes:')
root2 = Tk()
root2.withdraw()
folder_selected = filedialog.askdirectory()
# read the excel file
workbook = load_workbook(str(root.filename))
sheet = workbook.active
# settings for qrcode to be produced
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=4,
border=2,)
# excel file cell size settings that will be produced
sheet.column_dimensions['B'].width = 25
for i in range(1,len(sheet['A'])+1):
sheet.row_dimensions[i+1].height=150
# Title of B column
sheet["B1"]="Qr_Codes"
# production of qrcodes for each row in the A column except first row. Skips the empty rows.
for i in range(2,len(sheet['A'])+1):
if sheet.cell(row=i, column=1).value is None:
continue
else:
qr.add_data(str(sheet.cell(row=i, column=1).value))
qr.make(fit=True)
img = qr.make_image()
img.save(folder_selected + "/" + "row_"+str(i)+"_qrcode.png")
img=openpyxl.drawing.image.Image(folder_selected + "/" + "row_"+str(i)+"_qrcode.png")
img.anchor = "B" + str(i)
sheet.add_image(img)
sheet["B" + str(i)].alignment = Alignment(horizontal='center', vertical='center')
sheet["A" + str(i)].alignment = Alignment(horizontal='center', vertical='center')
# saving the excel file
workbook.save(folder_selected+ "/qrcode_produced.xlsx")

How do I call a file imported through tkinter filedialog?

I am trying to create a very simple GUI that will import a file, run it through some data formatting code, and export it as an .xlsx file
The file would be an excel file. An example would be:
col1
a
b
c
and my current python script does this:
df = read_excel('file.xlsx')
mapping = {'a':'apple','b':'banana','c':'carrot'}
df = df.replace({"col1":mapping}, regex=True)
and it returns:
col1
apple
banana
carrot
but now I am trying to create a GUI that will run it instead (:
This is the code I have so far (I get the error ValueError: DataFrame constructor not properly called!):
import tkinter as tk
from tkinter import filedialog
import pandas as pd
from datetime import datetime, date
def UploadAction(event=None):
filename = filedialog.askopenfilename()
print('Selected:', filename)
df = pd.DataFrame(eval(data=filename))
mmapping = {'a':'apple','b':'banana','c':'carrot'}
df = df.replace({"col1":mapping}, regex=True)
print(df['col1'])
root = tk.Tk()
button = tk.Button(root, text='Open', command=UploadAction)
button.pack()
root.mainloop()
For the excel export, I know the code should be:
writer = pd.ExcelWriter("newfile.xlsx",
engine='xlsxwriter',
datetime_format='yyyymmdd',
date_format='yyyymmdd')
df.to_excel(writer, sheet_name = ('Sheet1'))
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.set_column('B:C', 20)
writer.save()
but I am not sure of how to include it in the GUI program.
So what would you suggest?
There you go:
=^..^=
import pandas as pd
import tkinter as tk
from tkinter import filedialog
def open_file():
# open file
filename = filedialog.askopenfilename()
# load data into data frame
data = pd.read_csv(filename, sep=" ", header=None)
return data
root = tk.Tk()
button = tk.Button(root, text='Open', command=open_file)
button.pack()
# do something with data
df_data = open_file()
df = df_data.drop(0, axis=1)
# save data to excel
df.to_excel("output.xlsx")
root.mainloop()

i want to write looping dataframe to excel

1.I am new to python.this task for mainly read the excel files in directory and filter the data in excel. After filtering write into excel.When iam trying to write to excel its storing only last iteration values.Please give advise to write all data to excel . I want to write df_filter and df_filter1 to excel which is for loop .Please help me i need to write these dataframe to excell
import os
import xlrd
import pandas as pd
import xlwt
from openpyxl import load_workbook
import xlsxwriter
from pyexcelerate import Workbook
import numpy as np
from pandas import ExcelWriter
from tempfile import TemporaryFile
ALL_SHEETS = []
sheet_list = ""
file_path = os.path.join(input("enter Dir path"))
config_path = os.path.join(input("enter your config file path here"))
output_path = os.path.join(input("Dude where you want store outputfile"))
output1 = pd.ExcelWriter(output_path, engine='xlsxwriter')
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
i = 0
data1 = []
data = []
Packet_size = []
Trail_numbers = []
Though_put = []
Latency = []
Jitter = []
df_filter = pd.DataFrame(columns=['packetsize', 'throughput', 'latency (us)', 'jitter (us)'])
df_filter1 = pd.DataFrame(columns=['packetsize', 'throughput', 'latency (us)', 'jitter (us)'])
#df_sheet = pd.DataFrame(columns=['zsheet'])
merged_inner=pd.DataFrame([])
def sheets(val):
s = wb.worksheets[val]
df_sheet = pd.DataFrame( data=['%s' % str(s) + '\n'])
#Name_sheet(s)
HeaderList = pd.read_csv(config_path)
column_list = []
for col in HeaderList:
col = col.lstrip("'")
col = col.rstrip("'")
column_list.append(col)
df1 = xl.parse(sheet_list[val], skiprows=i)
df1 = df1.filter(column_list)
df2 = df1[(df1['Result'] != 'Failed') & (df1['Frame Size Type'] == 'iMIX')]
if df2.empty:
pass
else:
final3= df2.groupby(['Trial Number', 'iMIX Distribution'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
#df_filter['sheetaname']=df_sheet(lambda a:'%s' % a['sheetvise'],axis=1)
final = final3.groupby(['iMIX Distribution'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
df_filter['packetsize'] = final.apply(lambda z: '%s' % (z['iMIX Distribution']), axis=1)
df_filter['throughput'] = final.apply(lambda z: '%s' % (z['Throughput (%)']), axis=1)
df_filter['latency (us)'] = final.apply(lambda x: '%s/%s/%s' % (x['Minimum Latency (us)'], x['Maximum Latency (us)'], x['Average Latency (us)']),axis=1)
df_filter['jitter (us)'] = final.apply(lambda y: '%s/%s/%s' % (y['Minimum Jitter (us)'], y['Maximum Jitter (us)'], y['Average Jitter (us)']),axis=1)
df_filter.to_excel(output1,sheet_name='mani')
output1.save()
df_filter.to_excel(output1, startrow=len(df_filter1)+len(df_filter)+2,sheet_name='mani')
output1.save()
df3 = df1[(df1['Result'] != 'Failed') & (df1['Frame Size Type'] == 'Fixed')]
if df3.empty:
pass
else:
final2 = df3.groupby(['Trial Number', 'Configured Frame Size'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
final1=final2.groupby(['Configured Frame Size'],sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
df_filter1['packetsize'] = final1.apply(lambda z: '%s' % (z['Configured Frame Size']), axis=1)
df_filter1['throughput'] = final1.apply(lambda z: '%s' % (z['Throughput (%)']), axis=1)
df_filter1['latency (us)'] = final1.apply(lambda x: '%s/%s/%s' % (x['Minimum Latency (us)'], x['Maximum Latency (us)'], x['Average Latency (us)']),axis=1)
df_filter1['jitter (us)'] = final1.apply(lambda y: '%s/%s/%s' % (y['Minimum Jitter (us)'], y['Maximum Jitter (us)'], y['Average Jitter (us)']),axis=1)
df_filter1.to_excel(output1, sheet_name='mani')
df_filter1.to_excel(output1, startrow=len(df_filter1)+len(df_filter) + 2, sheet_name='mani')
output1.save()
def sheet_every():
for sheet in range(0, sheet_list_lenght):
sheets(sheet)
for file in (ALL_SHEETS):
df_file = pd.DataFrame(data=[file])
workbook = xlrd.open_workbook(file)
wb = load_workbook(file)
xl = pd.ExcelFile(file)
i = 0
sheet_list = workbook.sheet_names()
sheet_list_lenght = (len(sheet_list))
for sheet in sheet_list:
worksheet = workbook.sheet_by_name(sheet)
for i in range(0, worksheet.nrows):
row = worksheet.row_values(i)
if 'Trial Number' in row:``
break
sheet_every()
Not sure if this answers your question or not, but if you want to read from a dataframe and add rows to a new dataframe thorugh a loop you can refer the code below:
dummyData = pd.read_csv("someexcelfile.csv")
#You can merge mutiple dataframes into dummyData and make it a big dataframe
dummyInsertTable = pd.DataFrame(columns=["Col1","Col2","Col3"])
for i in range(len(dummyData)):
dummyInsertTable.loc[i,"Col1"] = dummyData["Col1"][i]
dummyInsertTable.loc[i, "Col2"] = dummyData["Col2"][i]
dummyInsertTable.loc[i, "Col3"] = dummyData["Col3"][i]
dummyInsertTable.to_csv("writeCSVFile.csv")
And next time be precise where you are facing the problem.
EDIT
Try loading the first dataframe and then loop through the other files and append the files in the first dataframe. Refer the code:
import pandas as pd
#Make a list of all the file you have
filesList = ["/home/bhushan/firstFile.csv","/home/bhushan/secondFile.csv","/home/bhushan/thirdFile.csv","/home/bhushan/fourthFile.csv"]
#Read the first csv file using pandas.read_csv
firstFile = pd.read_csv(filesList[0])
#Loop through the rest of the files and append the files in the first DataFrame
for i in range(1,len(filesList)):
fileToBeAdded = pd.read_csv(filesList[i])
firstFile = firstFile.append(fileToBeAdded)
#Write the final file
finalFile = firstFile
finalFile.to_csv("finalFile.csv")
If I get your question correctly, you have two data frames which you want to write to one excel file but you are only getting the last one.
You should write them to two different sheets instead, then you can retrieve them as per requirement, either individually or combined.
Follow the below links for more details and implementation :
https://xlsxwriter.readthedocs.io/example_pandas_multiple.html
https://campus.datacamp.com/courses/importing-managing-financial-data-in-python/importing-stock-listing-data-from-excel?ex=11
Also, you can instead write to a csv file, that is also excel compatible and easier to handle. Also I have observed that it is faster and more space efficient compared to writing to .xlsx file.
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html

Categories