extend xlsxwriter.Worksheet() class - python

I currently override the xlsxwriter.Workbook, called rpt.Workbook. Would like to add method to xlsxwriter.Worksheet(), but since xlsxwriter.Workbook() imports Worksheet,not sure how this can be done without major convulsions. Currently, I have to pass the worksheet object as an argument.
Seems like I need to write override methods for xlsxwriter.Workbook() to point to a custom class of xlsxwriter.Worksheet() , but can't figure out how.
Here is the current override rpt.Workbook() being used:
####################################################################
class Workbook(xlsxwriter.Workbook):
####################################################################
"""\nSpreadsheet class provides methods to build a spreadsheet.
"""
####################################################################
def __init__(self,filename=None, options={}):
####################################################################
try:
filename = rpt.fname(filename)
except FileNotFoundError as err:
log.error(err)
return False
log.info("Initializing excel file " + filename)
super().__init__(filename,options)
####################################################################
def add_tab(self,name=None,data=None,header=None,
srow=0,scol=0,autocol=True):
####################################################################
"""\nProvides method to add_worksheet and add_table in 1 call.
Required Attribute args:
name = TAB name
header = list of header names
data = list of lists for spreadsheet contents
Optional Attribute args:
srow = starting row for table, default 0
scol = starting col for table, default 0
autocol = True/False, auto set the column sizes
add_tab also adds the worksheet.header attribute to
allow the set_col_by_name function to work
"""
if not data:
log.warning("data=[][] required")
return None
if not header:
log.warning("header=[] required")
return False
columns = []
for field in header:
columns.append({ 'header' : field })
worksheet = self.add_worksheet(name)
worksheet.header = header
tableinfo= {
'data' : data,
'columns' : columns
}
lastcol = scol + (len(header) - 1)
lastrow = srow + (len(data) + 1)
worksheet.add_table(srow,scol,lastrow,lastcol,tableinfo)
#if autocol:
#self.auto_set_columns(worksheet=worksheet,data=data,scol=scol)
worksheet.freeze_panes(0,1)
return worksheet
####################################################################
def auto_set_columns(self,worksheet=None,data=None,header=None,scol=0):
####################################################################
"""\nDetermines the max length of each column and then set
that column width.
Required Attribute args:
worksheet = worksheet object
data = list of lists data
Optional Attribute args:
scol = Column start
header = row of headers for data list of lists.
If header not specified, worksheet
must have been created with self.add_tab()
"""
if not header and worksheet.header:
header = worksheet.header
## table = [] list of lists, combine header and data
table = []
table.append(header) for row in data:
table.append(row)
ziptable = list(zip (*table))
colsizes = []
for idx,val in enumerate(table[0]):
size = max(len(s) for s in ziptable[idx])
colnum = idx + scol
log.debug("Setting column => {} col size => {} => {}".format(colnum,val,size))
worksheet.set_column(colnum,colnum,size)
I want to add a method to xlsxwriter.Worksheet() called auto_set_columns(). Currently I have to pass the worksheet object (worksheet=worksheet) as an object to get this to work.I would like to utilize worksheet.auto_set_columns() instead. Currently auto_set_columns() is a method of rpt.Workbook.
Would like auto_set_columns() to be an extended method of xlsxwriter.Worksheet.
The script side utilization currently looks like this and works:
excelfile = nashomes + '/nas_homes.xlsx'
spreadsheet = rpt.Workbook(excelfile)
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
spreadsheet.auto_set_columns(worksheet=worksheet,data=hrpt.data,scol=0)
What I desire, notice the last line changes:
excelfile = nashomes + '/nas_homes.xlsx'
spreadsheet = rpt.Workbook(excelfile)
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
worksheet.auto_set_columns(data=hrpt.data,scol=0)
Goal desired here is that worksheet object (which is xlsxwriter.Worksheet() ) can have an extended "auto_set_columns" method. However, since the worksheet object is created from a add_worksheet() method in the xlsxwriter.Workbook() class, I can't figure out how to extend xlsxwriter.Worksheet() without major override methods to xlsxwriter.Workbook() also. How can I get xlsxwriter.Workbook() to reference my extended xlsxwriter.Worksheet() when Workbook.add_worksheet() creates the Worksheet object?

How about monkey patching the worksheet before returning it from add_tab?
First create the standalone function outside of any class definitions:
import types
def auto_set_columns(self,data=None,header=None,scol=0):
if not header and self.header:
header = self.header
## table = [] list of lists, combine header and data
table = []
table.append(header)
for row in data:
table.append(row)
ziptable = list(zip (*table))
colsizes = []
for idx,val in enumerate(table[0]):
size = max(len(s) for s in ziptable[idx])
colnum = idx + scol
print "Setting column => {} col size => {} => {}".format(colnum,val,size)
self.set_column(colnum,colnum,size)
And then inside your Worksheet.add_tab function, patch in the method before returning:
....
worksheet.freeze_panes(0,1)
worksheet.auto_set_columns = types.MethodType( auto_set_columns, worksheet )
return worksheet
Now you should be able to run:
worksheet = spreadsheet.add_tab(name='Nas Homes',data=hrpt.data,header=hrpt.header)
worksheet.auto_set_columns(data=hrpt.data,scol=0)

Related

Reading the Value Attribute of a Checkbox in Flask/WTF

I have a form with a column of checkboxes corresponding to the columns in my database. I'm setting the value of each checkbox (in javascript) to the name of the column, but when I try to read the checkbox value in Flask/Python all I can get is True or False. How do I read the text value of the value attribute of the checkboxes?
Just to complicate things, I'm generating the form as a FieldList of FormFields, so I can't simply hardcode the field names. (Well, I could, but that would make it fragile to schema changes.)
My form code is
class ImportFilterSubForm(Form):
use = BooleanField(
'Use',
render_kw={'class': 'use'}
)
sel = SelectField(
'Maps to:',
choices=[],
render_kw={'class': 'sel'},
validators=[Optional()]
)
class ImportFilterForm(FlaskForm):
rows = FieldList(FormField(ImportFilterSubForm))
My view code, with error handling removed, is
def prefilter_import():
db_columns = Contact.__table__.columns.keys()
filename = request.cookies.get('workfile')
with open(filename) as fh:
reader = csv.DictReader(fh)
file_columns = reader.fieldnames
form = ImportFilterForm()
for col in db_columns:
new_row = form.rows.append_entry()
new_row.use.label = col
new_row.sel.choices = file_columns
input_file = request.cookies.get('input_file')
return render_template('filter_import.html', form=form, filename=input_file)
def postfilter_import():
form = ImportFilterForm()
db_columns = Contact.__table__.columns.keys()
filename = request.cookies.get('workfile')
with open(filename) as fh:
reader = csv.DictReader(fh)
file_columns = reader.fieldnames
missing_columns = db_columns
extra_columns = file_columns
mappings = dict()
for i, row in enumerate(form.rows):
if row.use.data: # Problem arises here
mappings[row.use.data] = row.sel.data
for key, value in mappings.items():
missing_columns.remove(key) # Problem manifests here
extra_columns.remove(value)
I'm trying to create a dict mapping the values of the checkboxes to the values of the selects, but I'm only getting True and False for the checkboxes, even though I've verified that the checkboxes' value attributes are correctly returned as the names of the corresponding columns.
How can I get Flask/WTForms to return the text of the value attributes?
After some investigation, I discovered the raw_data attribute of the field object, which contains a list of the values of the value attributes of the HTML control. Thus, the code
mappings = dict()
for row in form.rows:
if row.use.data:
mappings[row.sel.data] = row.use.raw_data[0]
for key, value in mappings.items():
missing_columns.remove(value)
extra_columns.remove(key)
does what I need it to do.

How skip to another loop in python if no data returned by the API?

I have a python code that loops through multiple location and pulls data from a third part API. Below is the code sublocation_idsare location id coming from a directory.
As you can see from the code the data gets converted to a data frame and then saved to a Excel file. The current issue I am facing is if the API does not returns data for publication_timestamp for certain location the loop stops and does not proceeds and I get error as shown below the code.
How do I avoid this and skip to another loop if no data is returned by the API?
for sub in sublocation_ids:
city_num_int = sub['id']
city_num_str = str(city_num_int)
city_name = sub['name']
filter_text_new = filter_text.format(city_num_str)
data = json.dumps({"filters": [filter_text_new], "sort_by":"created_at", "size":2})
r = requests.post(url = api_endpoint, data = data).json()
articles_list = r["articles"]
articles_list_normalized = json_normalize(articles_list)
df = articles_list_normalized
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
writer.save()
Key Error: publication_timestamp
Change this bit of code:
df = articles_list_normalized
if 'publication_timestamp' in df.columns:
df['publication_timestamp'] = pd.to_datetime(df['publication_timestamp'])
df['publication_timestamp'] = df['publication_timestamp'].apply(lambda x: x.now().strftime('%Y-%m-%d'))
df.to_excel(writer, sheet_name = city_name)
else:
continue
If the API literally returns no data i.e. {} then you might even do the check before normalizing it:
if articles_list:
df = json_normalize(articles_list)
# ... rest of code ...
else:
continue

Adding decimal point, additional row w/ headers and rearranging columns in pythron script

I have the following code below which takes a specific type of .xml file and converts to a more reader friendly .csv file.
You can see what the reader friendly .csv looks like in the two linked .csv file output links below.
A few things I would like to add/change, but need help with altering my code to do so.
1.) I'd like to add one decimal position to column H so instead of reading say 20, I want it to read as 2.0. I'd also like to add one decimal position to any row that has CF in it, i.e. cell AE1, AG1, AI1, etc.
2.) I'd like to insert a row above row 1 where I can type in row headers. For example, H1 reads as T2. I want to insert a row above H1 that will say Accel./Decel. Time [s]. I'd like to be able to do this for every row in the spreadsheet.
3.) I'd like to rearrange the column order of the outputted .csv file. I'd like to move columns AD - BQ to where column I is.
Any help with any of these would be greatly appreciated. Thanks!
.csv file output
.csv file output1
from lxml import etree
import os
import pandas as pd
import collections
ProcessRecipe = collections.namedtuple('ProcessRecipe', ['recipe_number', 'recipe_steps'])
class File:
def __init__(self, fp):
fp_wo_quotes = fp.replace('"', '')
if os.path.isfile(fp_wo_quotes):
self.fp = fp_wo_quotes
else:
self.fp = None
#property
def parent_folder(self):
return os.path.dirname(self.fp)
def new_fp(self, file_name):
return os.path.join(self.parent_folder, file_name)
class MPCProcessRecipe:
RECIPE_TYPE = 'MPCProcessRecipe'
def __init__(self, fp):
"""
constructor
:param fp: absolute file path to a recipe file
"""
self.tree = etree.parse(fp)
#staticmethod
def get_columns(step_data):
"""
Collect attributes from a step data element for CSV column header
:param step_data: step data element
:return: column names as a list of strings
"""
_columns = ['Step']
for data_element in step_data.xpath('./Data'):
# <Data name="Repeat">0</Data> // without index
# <Data name="MR" index="1"/> // with index
_name = data_element.get('name')
_index = data_element.get('index')
column_name = _name + _index if _index else _name
_columns.append(column_name)
# print('columns', _columns)
return _columns
#staticmethod
def get_data(step_data_list):
data = []
for step_data in step_data_list:
name = step_data.tag
step = step_data.get('step')
values = []
for _data_element in step_data.xpath('./Data'):
_text = _data_element.text
if _text:
values.append(_text)
else:
values.append('')
datum = [name + step] + values
# print('datum', datum)
data.append(datum)
return data
#property
def process_recipe_list(self):
"""
Loop through <Recipe> elements and populate DataFrames
:return: list of ProcessRecipe namedtuple
"""
pr_list = []
for recipe in self.tree.xpath('/Recipes/Recipe'):
# <Recipe number="994" type="MPCProcessRecipe">
data =[]
if recipe.get('type') == self.RECIPE_TYPE:
recipe_number = recipe.get('number')
# <PreStepData step="1">
# <StepData step="1">
pre_step_data_list = recipe.xpath('./RecipeData/PreStepData')
step_data_list = recipe.xpath('./RecipeData/StepData')
# create columns from the first entry of StepData
columns = MPCProcessRecipe.get_columns(step_data_list[0])
if len(pre_step_data_list) > 0:
data += MPCProcessRecipe.get_data(pre_step_data_list)
data += MPCProcessRecipe.get_data(step_data_list)
df = pd.DataFrame(data=data, columns=columns)
pr = ProcessRecipe(recipe_number, df)
pr_list.append(pr)
return pr_list
def convert_xml_files_to_csv_files():
fp_input = input('D&D a recipe file: ')
file_obj = File(fp_input)
print('File Path:', file_obj.fp)
rcp = MPCProcessRecipe(file_obj.fp)
pr_list = rcp.process_recipe_list
for pr in pr_list:
out_fp = file_obj.new_fp(pr.recipe_number + '.csv')
pr.recipe_steps.to_csv(out_fp)
if __name__ == '__main__':
convert_xml_files_to_csv_files()

Create multiple dataframes as properties of an instance of a class within if loop

I have a class, myClass, that I wish to add several dataframes too. At first the class requires a name, and a list of filepaths for an instance to be created:
class myClass:
def __init__(self, name, filepathlist):
self.name = name
self.filepathlist = filepathlist
The data that is pulled into the instance is not in the desired format. As such I have created a method of the class to format the data and create a property of the class for each file that is read:
def formatData(self):
i = 0
if i < (len(self.filepathlist) - 1):
DFRAW = pd.read_csv(self.filepathlist[i], header = 9) #Row 9 is the row that is not blank (all blank auto-skipped)
DFRAW['DateTime'], DFRAW['dummycol1'] = DFRAW[' ;W;W;W;W'].str.split(';', 1).str
DFRAW['Col1'], DFRAW['dummycol2'] = DFRAW['dummycol1'].str.split(';', 1).str
DFRAW['Col2'], DFRAW['dummycol3'] = DFRAW['dummycol2'].str.split(';', 1).str
DFRAW['Col3'], DFRAW['Col4'] = DFRAW['dummycol3'].str.split(';', 1).str
DFRAW= DFRAW.drop([' ;W;W;W;W', 'dummycol1', 'dummycol2', 'dummycol3'], axis = 1)
#There appears to be an issue with these two lines.
processedfilename = "MYDFNAME" + str(i)
self.processedfilename = DFRAW
i = i + 1
I have run the formatting lines of code, those that start with DFRAW, outside of the class and believe these are working correctly.
Somewhere in the script there is an issue with assigning the dataframes as properties of the class; I create a list of filepaths and an instance of the class:
filepathlist = [r"file1.csv",r"file2.csv"]
myINST = myClass("MyInstName", filepathlist )
Then run the formatting method:
myINST.formatData()
Now running the following to check that the instance of the class, myINST, has the properties correctly assigned;
vars(myINST)
But this returns the filepathlist, name and roughly 8000 lines of rows of data from the dataframe. I was expecting the following:
filepathlist, name, MYDFNAME0, MYDFNAME1
What is the error in my code or my approach?
vars will return all the values of an instance, and since myClass have three values: name, filepathlist and processedfilename (which should really be a dataframe), so it will return all.
If you only want the filepathlist, you can access it through instance_object.field_name.
myINST.filepathlist and this will return [r"file1.csv",r"file2.csv"].
Also, you are probably not doing correct here:
processedfilename = "MYDFNAME" + str(i)
self.processedfilename = DFRAW
i = i + 1
(1) You are storing dataframe object in a field called processedfilename, which is weird. (2) You are not appending values but rather replacing, thus after the loop, this will only return you the latest data frame in your filepathlist.
You should store your dataframe in a better format: list, dictionary, etc.
Actually you can access your dataframe(s) in vars() if you incorporate it into the __init__ method. Below builds a dictionary of dataframes with keys being original csv file names.
class myClass:
def __init__(self, name, filepathlist):
self.name = name
self.filepathlist = filepathlist
self.mydataframedict = self.formatData()
def formatData(self):
tmp_dict = {}
for f in self.filepathlist:
DFRAW = pd.read_csv(f, header = 9)
DFRAW['DateTime'], DFRAW['dummycol1'] = DFRAW[' ;W;W;W;W'].str.split(';', 1).str
DFRAW['Col1'], DFRAW['dummycol2'] = DFRAW['dummycol1'].str.split(';', 1).str
DFRAW['Col2'], DFRAW['dummycol3'] = DFRAW['dummycol2'].str.split(';', 1).str
DFRAW['Col3'], DFRAW['Col4'] = DFRAW['dummycol3'].str.split(';', 1).str
DFRAW = DFRAW.drop([' ;W;W;W;W', 'dummycol1', 'dummycol2', 'dummycol3'], axis = 1)
tmp_dict[f] = DFRAW
return tmp_dict
filepathlist = [r"file1.csv", r"file2.csv"]
myINST = myClass("MyInstName", filepathlist )
new_dict = myINST.formatData() # LOCAL VARIABLE (ALSO ACCESSIBLE IN VARS)
print(vars(myINST))
# {'name': 'MyInstName', 'mydataframedict': {'file1.csv': ..., 'file2.csv': ...},
# 'filepathlist': ['file1.csv', 'file2.csv']}

export list to csv and present to user via browser

Want to prompt browser to save csv
^^working off above question, file is exporting correctly but the data is not displaying correctly.
#view_config(route_name='csvfile', renderer='csv')
def csv(self):
name = DBSession.query(table).join(othertable).filter(othertable.id == 9701).all()
header = ['name']
rows = []
for item in name:
rows = [item.id]
return {
'header': header,
'rows': rows
}
Getting _csv.Error
Error: sequence expected but if I change in my renderer writer.writerows(value['rows']) to writer.writerow(value['rows']) the file will download via the browser just fine. Problem is, it's not displaying data in each row. The entire result/dataset is in one row, so each entry is in it's own column rather than it's own row.
First, I wonder if having a return statement inside your for loop isn't also causing problems; from the linked example it looks like their loop was in the prior statement.
I think what it looks like it's doing is it's building a collection of rows based on "table" having columns with the same name as the headers. What are the fields in your table table?
name = DBSession.query(table).join(othertable).filter(othertable.id == 9701).all()
This is going to give you back essentially a collection of rows from table, as if you did a SELECT query on it.
Something like
name = DBSession.query(table).join(othertable).filter(othertable.id == 9701).all()
header = ['name']
rows = []
for item in name:
rows.append(item.name)
return {
'header': header,
'rows': r
}
Figured it out. kept getting Error: sequence expected so I was looking at the output. Decided to try putting the result inside another list.
#view_config(route_name='csv', renderer='csv')
def csv(self):
d = datetime.now()
query = DBSession.query(table, othertable).join(othertable).join(thirdtable).filter(
thirdtable.sid == 9701)
header = ['First Name', 'Last Name']
rows = []
filename = "csvreport" + d.strftime(" %m/%d").replace(' 0', '')
for i in query:
items = [i.table.first_name, i.table.last_name, i.othertable.login_time.strftime("%m/%d/%Y"),
]
rows.append(items)
return {
'header': header,
'rows': rows,
'filename': filename
}
This accomplishes 3 things. Fills out the header, fills the rows, and passes through a filename.
Renderer should look like this:
class CSVRenderer(object):
def __init__(self, info):
pass
def __call__(self, value, system):
fout = StringIO.StringIO()
writer = csv.writer(fout, delimiter=',',quotechar =',',quoting=csv.QUOTE_MINIMAL)
writer.writerow(value['header'])
writer.writerows(value['rows'])
resp = system['request'].response
resp.content_type = 'text/csv'
resp.content_disposition = 'attachment;filename='+value['filename']+'.csv'
return fout.getvalue()
This way, you can use the same csv renderer anywhere else and be able to pass through your own filename. It's also the only way I could figure out how to get the data from one column in the database to iterate through one column in the renderer. It feels a bit hacky but it works and works well.

Categories