iterate over multiple files in my directory - python

Currently I am grabbing a excel file from a folder with Python just fine; in the below code.. and pushing this to a web form via selenium.
However, I am trying to modify this to continue to go through a directory over multiple files. (there will be many excel files in my 'directory' or 'folder').
main.py
from data.find_pending_records import FindPendingRecords
from vital.vital_entry import VitalEntry
if __name__ == "__main__":
try:
#Instantiates FindPendingRecords then gets records to process
PENDING_RECORDS = FindPendingRecords().get_excel_data()
#Reads excel to map data from excel to vital
MAP_DATA = FindPendingRecords().get_mapping_data()
#Configures Driver for vital
VITAL_ENTRY = VitalEntry()
#Start chrome and navigate to vital website
VITAL_ENTRY.instantiate_chrome()
#Begin processing Records
VITAL_ENTRY.process_records(PENDING_RECORDS, MAP_DATA)
print("All done, Bill")
except Exception as exc:
print(exc)
config.py
FILE_LOCATION = r"C:\Zip\2019.02.12 Data Docs.zip"
UNZIP_LOCATION = r"C:\Zip\Pending"
VITAL_URL = 'http://boringdatabasewebsite:8080/Horrible'
HEADLESS = False
PROCESSORS = 4
MAPPING_DOC = ".//map/mapping.xlsx"
find_pending_records.py
"""Module used to find records that need to be inserted into Horrible website"""
from zipfile import ZipFile
import math
import pandas
import config
class FindPendingRecords:
"""Class used to find records that need to be inserted into Site"""
#classmethod
def find_file(cls):
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
return archive.extract(file.filename, config.UNZIP_LOCATION)
return FileNotFoundError
def get_excel_data(self):
"""Places excel data into pandas dataframe"""
excel_data = pandas.read_excel(self.find_file())
columns = pandas.DataFrame(columns=excel_data.columns.tolist())
excel_data = pandas.concat([excel_data, columns])
excel_data.columns = excel_data.columns.str.strip()
excel_data.columns = excel_data.columns.str.replace("/", "_")
excel_data.columns = excel_data.columns.str.replace(" ", "_")
num_valid_records = 0
for row in excel_data.itertuples():
person = row.PERSON
if person in ("", " ", None) or math.isnan(mrn):
print(f"Invalid record: {row}")
excel_data = excel_data.drop(excel_data.index[row.Index])
else:
num_valid_records += 1
print(f"Processing #{num_valid_records} records")
return self.clean_data_frame(excel_data)
def clean_data_frame(self, data_frame):
"""Cleans up dataframes"""
for col in data_frame.columns:
if "date" in col.lower():
data_frame[col] = pandas.to_datetime(data_frame[col],
errors='coerce', infer_datetime_format=True)
data_frame[col] = data_frame[col].dt.date
data_frame['PERSON'] = data_frame['PERSON'].astype(int).astype(str)
return data_frame
def get_mapping_data(self):
map_data = pandas.read_excel(config.MAPPING_DOC, sheet_name='main')
columns = pandas.DataFrame(columns=map_data.columns.tolist())
return pandas.concat([map_data, columns])

One way is as below (pseudocode)
class FindPendingRecords:
#classmethod
def find_file(cls):
return ["file1", "file2", "file3"]
def __init__(self):
self.files = self.find_file()
def get_excel_data(self):
for excel_data in self.files:
# process your excel_data
yield excel_data
Your main should be
if __name__ == "__main__":
try:
for PENDING_RECORDS in FindPendingRecords().get_excel_data():
# Do operations on PENDING_RECORDS
print (PENDING_RECORDS)
print("All done, Bill")
except Exception as exc:
print(exc)
Your find_file method will be
#classmethod
def find_file(cls):
all_files = list()
""""Finds the excel file to process"""
archive = ZipFile(config.FILE_LOCATION)
for file in archive.filelist:
if file.filename.__contains__('Horrible Data Log '):
all_files.append(archive.extract(file.filename, config.UNZIP_LOCATION))
return all_files

Related

How to insert a data frame as an object attribute

This is most likely a pretty basic question, but I am still learning about classes/objects/constructors/etc. and I am trying to apply some of these concepts to my current workflow.
I am trying to create a class that automatically saves my data frame as a CSV or xlsx file, depending on what I specify, to a given folder. However, I don't believe that I am correctly passing my data frame as an object attribute. This is my code as it stands:
award_date_change = merged_df.loc[merged_df['award_date_change'] == 'yes'] #this is my data frame
class uploading_to_GC:
def __init__(self, file_name, file_type, already_exists): #constructor where I want to pass my data frame, file type to be saved to, and specifying if the file already exists in my folder
self.file_name = file_name
self.file_type = file_type
self.already_exists = already_exists
def print_file_name(self):
self.file_name.head(5)
def private_workspace(self):
commonPath = os.path.expanduser(r"~\path")
GCdocs = commonPath + '384593683' + '\\'
path = GCdocs + "" + file_name
if len(self.file_name) != 0 and self.already_exists == True: #if a file already exists in Gfolder
if self.file_type == "csv": #for csv files
GC_old = pd.read_csv(path)
GC_new = GC_old.append(self.file_name, ignore_index=True)
GC_new.to_csv(path, index = False)
print("csv file is updated to private workspace in GCdocs")
elif self.file_type == "xlsx": #for xlsx files
GC_old = pd.read_csv(path)
GC_new = GC_old.append(self.file_name, ignore_index=True)
GC_new.to_excel(path, index = False)
print("excel file is updated to private workspace in GCdocs")
else:
print("unrecognized file type")
elif len(self.file_name) != 0 and self.already_exists == False: #if a file does FOLDER already exist in folder
if self.file_type == "csv":
self.file_name.to_csv(path,index=False)
if self.file_type == "xlsx":
self.file_name.to_excel(path,index=False)
else:
print("unrecognized file type")
else:
print("there is no data to upload")
award_date_change = uploading_to_GC(award_date_change,"csv", False)
award_date_change.private_workspace
I am aware that I don't need to use a class to do this, but I wanted to challenge myself to start using classes more often. Any help would be appreciated
You can pass and store a df in a Class as a data member very simply:
class Foo:
def __init__(df: pd.DataFrame):
self.df = df
# or, if you want to be sure you don't modify the original df
self.df = df.copy()
df = pd.DataFrame()
foo_obj = Foo(df)
Edit: the : pd.DataFrame is for type-hinting. This does not affect the actual code, but is merely useful to the reader that we are expecting a pd.DataFrame as input. Good IDEs will also give you an error if you don't pass a DataFrame.

increment csv file number after loop completion

I am executing a loop and save data in a csv file. to store the data i am creating a path and directory and append the csv file. as it is the code executes fine and generates one csv file upon completion. i would like the csv file to increment each time i run the code so i dont have to delete or overwrite the previous one such a way that i get file_0 for run1, file_1 for run 2 and so on. i inserted `# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])'' but this saves me each point in a separate file. any suggestions welcome. thanks.
import csv
from fileinput import filename
from locale import currency
import time
from numpy import append, outer
import time
from datetime import datetime
import os
from random import random
#create folder timestamped
class CreateFile():
def __init__(self, procedure):
self.procedure = procedure # Get the procedure name.
self.createfile() # Call create file function.
def createfile(self):
date = datetime.now().date()
PARENT_DIR = "C:\test/".format(date) # Get the path.
DIR = '{}'.format(self.procedure) # Get procedure name.
self.PATH = os.path.join(PARENT_DIR, DIR) # Form a full path.
try:
if not os.path.exists(self.PATH): # If the given path does not exists.
os.makedirs(self.PATH) # Make a directory.
except OSError: # OSError occur, don't make the directory.
print ("Creation of the directory [%s] failed." % DIR) # Print message.
else: # Successfully created the director, print the message.
print ("Successfully created the directory %s " % DIR)
def get_file(self):
file_list = os.listdir(self.PATH) # Load path into list directory.
file_count = len(file_list) # Chech the number of file(s) under the given path.
return [self.PATH, file_count] # Return full path and file count under this folder.
# initialization and setpoints list
startpoint = 300
setpoint = 310
step = 10
temp_list = []
for x in range(startpoint, setpoint+1, step):
temp_list.append(x)
print(temp_list)
class save_data1():
def __init__(self, file_name):
self.file_name = file_name
file_path_count = CreateFile(file_name).get_file()
self.fileName = "{}/{}.csv".format(file_path_count[0], file_name)
def record_csv(self, fileName, now, ep1):
with open(fileName, 'a', newline='') as csvfile:
header = ["Timestamp",'Temp', "ep1"]
writer = csv.DictWriter(csvfile, fieldnames=header)
if csvfile.tell() == 0:
writer.writeheader()
writer.writerow(
{
"Timestamp": now,
'Temp': temp,
"ep1": ep1
}
)
csvfile.close()
def test(self):
file_path = CreateFile(self.file_name).get_file()
# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])
fileName = "{}/{}.csv".format(file_path[0], self.file_name)
now = datetime.now()
ep1 = random() #here just random number instead of instrument
self.record_csv(fileName, now, ep1)
# Set setpoint in Temp list
for temp in temp_list:
# print a header
print('')
hdr = '______ T ______ __________ H __________\t______ Ext Param ______'
print(hdr)
time.sleep(0.5)
print('setpoint:', temp)
if temp == 300:
save_data1('meas1').test()
else:
print('Waiting ')
save_data1('meas1').test()

Why is this try/except loop exiting on errors?

I have a script that collects Reddit comments. It pulls from a csv file with a list of links in it. Some of the links are dead and I get 404/403/etc errors. The code below will correctly identify them and skip, but it then exits the loop and completes the process of making the csv file without continuing onto the next link.
import praw
import pprint
import csv
import os
import pandas as pd
from collections import namedtuple
from datetime import datetime
from pathlib import Path
def scrape_comments(reddit_api, csv_file, dest):
df = pd.read_csv(csv_file)
data = []
try:
for pid in df.id:
# post_comment = []
submission = reddit_api.submission(id=pid)
submission.comments.replace_more(limit=None)
for comment in submission.comments.list():
# post_comment.append(comment.body)
data.append((pid, comment.id, comment.parent_id, comment.body, comment.link_id,comment.author, comment.score, comment.created_utc, comment.subreddit))
# data.append((pid, ";".join(post_comment)))
except:
print ("Error! Skip the Current subreddit")
df = pd.DataFrame(data, columns=["post_id", "comment_id", "comment_parent_id", "comment_body", "comment_link_id","comment_author", "comment_score","comment_created","comment_subreddit"]) # append tuple
df.to_csv(dest, index=False, encoding='utf-8')
if __name__ == "__main__":
reddit_api = praw.Reddit(
client_id="####",
client_secret="####",
user_agent="####",
username="####",
password="####"
)
# reddit_api = init_praw(client_id, client_secret, user_agent, username, password)
csv_file = "####"
dest_dir = "####"
dest_name = "reddits_comments.csv"
Path(dest_dir).mkdir(parents=True, exist_ok=True)
dest = os.path.join(dest_dir, dest_name)
scrape_comments(reddit_api, csv_file, dest)
You should put the try/except around a smaller portion of your code, as said in the comments. Here's an illustration of that:
def scrape_comments(reddit_api, csv_file, dest):
df = pd.read_csv(csv_file)
data = []
for pid in df.id:
try:
# post_comment = []
submission = reddit_api.submission(id=pid)
submission.comments.replace_more(limit=None)
for comment in submission.comments.list():
# post_comment.append(comment.body)
data.append((pid, comment.id, comment.parent_id, comment.body, comment.link_id,comment.author, comment.score, comment.created_utc, comment.subreddit))
# data.append((pid, ";".join(post_comment)))
except Exception:
print ("Error! Skip the Current subreddit")
df = pd.DataFrame(data, columns=["post_id", "comment_id", "comment_parent_id", "comment_body", "comment_link_id","comment_author", "comment_score","comment_created","comment_subreddit"]) # append tuple
df.to_csv(dest, index=False, encoding='utf-8')

Is there a way to collect pytest results of multiple runs in a csv file?

I have a system that sometimes has errors while booting which is what I test with pytest. I want to collect the results in a csv file such that I can analyse the results of all the runs easily afterwards. If the boot failed I would like to get the result of the first test case which failed.
This is a very long but complete solution (Python 3 only!) which does exactly what I described above. It does so be using the conftest.py in pyest. It includes error handling for the case that the tests differ from the previously run tests in which case the csv file would be broken (different header). The header of the csv file will be auto-generated if it doesn't exist.
This script appends the result of every pytest run to the csv file. This is done after all tests have run. The first error message is recorded in test_results['first_error_message'].
Please read the code before running it. Remember to override statistics_csv_path which is the csv file where the test results will be stored.
import os
import logging
import csv
import datetime
from collections import OrderedDict
import pytest
failed = False
test_results = None
def get_current_test():
"""Just a helper function to extract the current test"""
full_name = os.environ.get('PYTEST_CURRENT_TEST').split(' ')[0]
test_file = full_name.split("::")[0].split('/')[-1].split('.py')[0]
test_name = full_name.split("::")[1]
return full_name, test_file, test_name
def pytest_configure(config):
"""Called when pytest is starting and before running any tests."""
global test_results
test_results = OrderedDict()
test_results['time'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# Initial state is success, only change on failure
test_results['state'] = "Success"
test_results['first_error_message'] = ""
test_results['amount_of_failed_tests'] = 0
def pytest_unconfigure(config):
"""Called when pytest is about to end. Can be used to print the result dict or
to pipe the data into a file"""
create_first_line = False
line_to_be_written = list(test_results.values())
csv_header = list(test_results.keys())
try:
with open(statistics_csv_path) as statistics_csv:
csv_reader = csv.reader(statistics_csv)
header = next(csv_reader)
if header is not None:
for i in range(len(csv_header)):
try:
if header[i] != csv_header[i]:
logger.critical(f"Non matching header column in the csv file: {header[i]} != {csv_header[i]}!!!!")
raise Exception("Probably the csv format of your tests have changed.. please fix!")
except IndexError:
raise Exception("Probably the csv format of your tests have changed.. please fix!")
else:
create_first_line = True
except FileNotFoundError:
create_first_line = True
with open(statistics_csv_path, 'a+', newline='') as statistics_csv:
csv_writer = csv.writer(statistics_csv)
if create_first_line:
csv_writer.writerow(csv_header)
csv_writer.writerow(line_to_be_written)
#pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
"""The actual wrapper that gets called before and after every test"""
global test_results
outcome = yield
rep = outcome.get_result()
if rep.when == "call":
full_name, test_file, test_name = get_current_test()
test_name_msg = f"{test_name}_msg"
if rep.failed:
test_results['state'] = "Failure"
test_results['amount_of_failed_tests'] += 1
test_results[test_name] = f"Failure"
test_results[test_name_msg] = f"{call.excinfo.typename} - {call.excinfo.value}"
if test_results['first_error_message'] == "":
test_results['first_error_message'] = test_results[test_name_msg]
else:
test_results[test_name] = "Success"
test_results[test_name_msg] = f""
Following matt3o answer: https://stackoverflow.com/a/61526655/5351910 and making some modifications and solving some issues i created a report that only writes the errors:
import os
import csv
import pytest
statistics_csv_path = 'test_result.csv'
def get_current_test():
"""Just a helper function to extract the current test"""
full_name = os.environ.get('PYTEST_CURRENT_TEST').split(' ')[0]
test_file = full_name.split("::")[0].split('/')[-1].split('.py')[0]
test_name = full_name.split("::")[1]
return full_name, test_file, test_name
#pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
"""The actual wrapper that gets called before and after every test"""
single_result = []
outcome = yield
rep = outcome.get_result()
if rep.when == "call":
full_name, test_file, test_name = get_current_test()
single_result.append(test_file)
single_result.append(test_name)
if rep.outcome == "failed":
single_result.append("FAILED")
single_result.append(f"{call.excinfo.typename} - {call.excinfo.value}")
csv_header = ['test_file', 'test_name', 'state', 'message']
create_first_line = False
try:
with open(statistics_csv_path) as statistics_csv:
csv_reader = csv.reader(statistics_csv)
header = next(csv_reader)
if header is None:
create_first_line = True
except FileNotFoundError:
create_first_line = True
with open(statistics_csv_path, 'a+', newline='') as statistics_csv:
csv_writer = csv.writer(statistics_csv)
if create_first_line:
csv_writer.writerow(csv_header)
csv_writer.writerow(single_result)

GUI for printing pandas data frame

I'm having 10 CSV files each of the CSV file is having same number of column from which I'm reading data one by one in the form of pandas data frame. I want those data to be displayed in a window or like in some table form. And it should be like if every time the data gets into new row. Any suggestions on this ?
Below is my sample CSV file :
Like this, there are 10 or more CSV file and I will be reading data from those file one by one and want to display in GUI.
Brief Introduction to my Application
I have a machine that is generating CSV files after a certain interval of time into a folder. I am using Watchdog library to put a watch on the folder where the CSV files are being generated. When I receive a CSV file I Read it into a pandas data frame. Sample CSV file is given above.
As far as the machine is running it will keep generating the CSV files. So if I want to see the data I need to open each and every CSV files, Instead, I want a View in which the Data gets updated when there is a new CSV file generated.
So Technically One CSV file is getting read gets converted into a data frame and then inserted into some sort of Table View. And this process happens again when a new CSV file is generated, But now the data should be kept in the next row of the same Table View.
Here is my main file :
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import pandas as pd
from Append_Function import append_df_to_excel
import os.path
import sys
class Watcher:
def __init__(self, args):
self.watch_dir = os.getcwd()
print(args[0])
self.directory_to_watch = os.path.join(self.watch_dir, args[1])
self.observer = Observer()
self.event_handler = Handler(patterns=["*.CSV"], ignore_patterns=["*.tmp"], ignore_directories=True)
def run(self):
self.observer.schedule(self.event_handler, self.directory_to_watch, recursive=False)
self.observer.start()
try:
while True:
time.sleep(1)
except:
self.observer.stop()
print("Error")
self.observer.join()
class Handler(PatternMatchingEventHandler):
#staticmethod
def on_any_event(event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
print("Received created event - %s." % event.src_path)
df = pd.read_csv(event.src_path, header=1, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
elif event.event_type == 'modified':
# Taken any actionc here when a file is modified.
df = pd.read_csv(event.src_path, header=0, index_col=0)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
print("Received modified event - %s." % event.src_path)
if __name__ == '__main__':
print(sys.argv)
w = Watcher(sys.argv)
w.run()
Here is my Append Function:
import pandas as pd
import openpyxl as ox
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = ox.load_workbook(filename,keep_vba=True)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title: ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs, header=True)
# save the workbook
writer.save()
You have to add the dataframe through a loop:
import pandas as pd
from PyQt5 import QtCore, QtWidgets
class DataFrameTableWidget(QtWidgets.QTableWidget):
def append_dataframe(self, df):
df = df.copy()
if df.columns.size > self.columnCount():
self.setColumnCount(df.columns.size)
r = self.rowCount()
self.insertRow(r)
for c, column in enumerate(df):
it = QtWidgets.QTableWidgetItem(column)
self.setItem(r, c, it)
i = self.rowCount()
for r, row in df.iterrows():
self.insertRow(self.rowCount())
for c, (column, value) in enumerate(row.iteritems()):
it = QtWidgets.QTableWidgetItem(str(value))
self.setItem(i+r , c, it)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
import numpy as np
w = DataFrameTableWidget()
df = pd.DataFrame(np.random.randint(0, 100,size=(4, 4)), columns=list('ABCD'))
w.append_dataframe(df)
def after_show():
df = pd.DataFrame(np.random.randint(0, 100,size=(4, 4)), columns=list('ABCD'))
w.append_dataframe(df)
QtCore.QTimer.singleShot(2*1000, after_show)
w.resize(640, 480)
w.show()
sys.exit(app.exec_())
Update:
The observer runs on another thread so it can not update the GUI from that thread, so a signal must be used to transmit the information:
import os
import time
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
from PyQt5 import QtCore, QtWidgets
from Append_Function import append_df_to_excel
class Emitter(QtCore.QObject):
newDataFrameSignal = QtCore.pyqtSignal(pd.DataFrame)
class Watcher:
def __init__(self, filename):
self.watch_dir = os.getcwd()
self.directory_to_watch = os.path.join(self.watch_dir, filename)
self.emitter = Emitter()
self.observer = Observer()
self.event_handler = Handler(
emitter=self.emitter,
patterns=["*.CSV"],
ignore_patterns=["*.tmp"],
ignore_directories=True
)
def run(self):
self.observer.schedule(self.event_handler, self.directory_to_watch, recursive=False)
self.observer.start()
class Handler(PatternMatchingEventHandler):
def __init__(self, *args, emitter=None, **kwargs):
super(Handler, self).__init__(*args, **kwargs)
self._emitter = emitter
def on_any_event(self, event):
if event.is_directory:
return None
elif event.event_type == 'created':
# Take any action here when a file is first created.
print("Received created event - %s." % event.src_path)
df = pd.read_csv(event.src_path, header=1)
self._emitter.newDataFrameSignal.emit(df.copy())
df.set_index(df.columns.values.tolist()[0], inplace=True)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
elif event.event_type == 'modified':
# Taken any actionc here when a file is modified.
df = pd.read_csv(event.src_path, header=1)
self._emitter.newDataFrameSignal.emit(df.copy())
df.set_index(df.columns.values.tolist()[0], inplace=True)
append_df_to_excel(os.path.join(os.getcwd(), "myfile.xlsx"), df)
print("Received modified event - %s." % event.src_path)
class DataFrameTableWidget(QtWidgets.QTableWidget):
#QtCore.pyqtSlot(pd.DataFrame)
def append_dataframe(self, df):
df = df.copy()
if df.columns.size > self.columnCount():
self.setColumnCount(df.columns.size)
r = self.rowCount()
self.insertRow(r)
for c, column in enumerate(df):
it = QtWidgets.QTableWidgetItem(column)
self.setItem(r, c, it)
i = self.rowCount()
for r, row in df.iterrows():
self.insertRow(self.rowCount())
for c, (column, value) in enumerate(row.iteritems()):
it = QtWidgets.QTableWidgetItem(str(value))
self.setItem(i+r , c, it)
if __name__ == '__main__':
import sys
app = QtWidgets.QApplication(sys.argv)
w = DataFrameTableWidget()
w.resize(640, 480)
w.show()
watcher = Watcher(sys.argv[1])
watcher.run()
watcher.emitter.newDataFrameSignal.connect(w.append_dataframe)
sys.exit(app.exec_())
You might be looking for:
Jupyter notebooks, which are able to display pandas dataframes as HTML formatted tables.
Jupyter lab, which includes a GUI CSV viewer.
The qgrid extension for jupyter notebooks, which will allow you to interactively filter and edit data.
If your CSV files have the same headers, you might want to concatenate the data to create one single table for review.

Categories