Matplotlib saving plot preferrences without the data - python

Currently I am looking at saving a matplotlib plot of unknown state as it is user facing. The way that I am intending to do this is by taking the figure and putting the preferences into a dictionary. However this can be done by saving any base type into the dictionary and then utilizing the json library to save and load this dictionary to file.
The overall aim is to have OS independence and cross matplotlib version compatibility.
I've prototyped saving the preferences/settings of the plot to dictionary:
import numpy as np
from matplotlib import ticker
import matplotlib.pyplot as plt
import matplotlib.colors
import json
def get_dict_from_fig(fig):
fig_dict = {}
axes_list = []
for ax in fig.axes:
axes_list.append(get_dict_for_axes(ax))
fig_dict["Axes"] = axes_list
fig_dict["Properties"] = get_dict_from_fig_properties(fig)
return fig_dict
def get_dict_for_axes(ax):
ax_dict = {}
# Get the axis properties
ax_dict["Properties"] = get_dict_from_axes_properties(ax)
# Get lines from the axes and store it's data
lines_list = []
for index, line in enumerate(ax.lines):
lines_list.append(get_dict_from_line(line, index))
ax_dict["Lines"] = lines_list
texts_list = []
for text in ax.texts:
texts_list.append(get_dict_from_text(text))
ax_dict["Texts"] = texts_list
ax_dict["Title"] = get_dict_from_text(ax.title)
ax_dict["XAxis Title"] = get_dict_from_text(ax.xaxis.label)
ax_dict["YAxis Title"] = get_dict_from_text(ax.yaxis.label)
# Potentially need to handle artists that are Text
artist_text_dict = {}
for artist in ax.artists:
if isinstance(artist, matplotlib.text.Text):
artist_text_dict = get_dict_from_text(artist)
ax_dict["Text from artists"] = artist_text_dict
legend_dict = {}
legend = ax.get_legend()
if legend is not None and legend.get_visible():
legend_dict = get_dict_from_legend(legend)
legend_dict["Visible"] = True
ax_dict["Legend"] = legend_dict
return ax_dict
def get_dict_from_axes_properties(ax):
prop_dict = {}
prop_dict["Bounds"] = ax.get_position().bounds
prop_dict["Dynamic"] = ax.get_navigate()
prop_dict["Axison"] = ax.axison
prop_dict["Frame On"] = ax.get_frame_on()
prop_dict["XAxis Properties"] = get_dict_from_axis_properties(ax.xaxis)
prop_dict["YAxis Properties"] = get_dict_from_axis_properties(ax.yaxis)
# XAxis scale and Xlim
prop_dict["XAxis Scale"] = ax.xaxis.get_scale()
prop_dict["XLim"] = ax.get_xlim()
# YAxis scale and Ylim
prop_dict["YAxis Scale"] = ax.xaxis.get_scale()
prop_dict["YLim"] = ax.get_ylim()
return prop_dict
def get_dict_from_axis_properties(ax):
prop_dict = {}
label1On = ax._major_tick_kw.get('label1On', True)
if isinstance(ax, matplotlib.axis.XAxis):
if label1On:
prop_dict["Position"] = "Bottom"
else:
prop_dict["Position"] = "Top"
elif isinstance(ax, matplotlib.axis.YAxis):
if label1On:
prop_dict["Position"] = "Left"
else:
prop_dict["Position"] = "Right"
else:
raise ValueError("Value passed is not a valid axis")
prop_dict["nTicks"] = len(ax.get_major_locator()())
if isinstance(ax.get_major_locator(), ticker.FixedLocator):
prop_dict["Tick Values"] = list(ax.get_major_locator())
else:
prop_dict["Tick Values"] = None
formatter = ax.get_major_formatter()
if isinstance(formatter, ticker.FixedFormatter):
prop_dict["Tick Format"] = list(formatter.seq)
else:
prop_dict["Tick Format"] = ""
labels = ax.get_ticklabels()
if labels:
prop_dict["Font size"] = labels[0].get_fontsize()
else:
prop_dict["Font size"] = ""
prop_dict["Scale"] = ax.get_scale()
prop_dict["Grid Style"] = get_dict_for_grid_style(ax)
prop_dict["Visible"] = ax.get_visible()
return prop_dict
def get_dict_for_grid_style(ax):
grid_style = {}
gridlines = ax.get_gridlines()
if ax._gridOnMajor and len(gridlines) > 0:
grid_style["Color"] = matplotlib.colors.to_hex(gridlines[0].get_color())
grid_style["Alpha"] = gridlines[0].get_alpha()
grid_style["Grid On"] = True
else:
grid_style["Grid On"] = False
return grid_style
def get_dict_from_line(line, index=0):
line_dict = {}
line_dict["Line Index"] = index
line_dict["Label"] = line.get_label()
line_dict["Alpha"] = line.get_alpha()
if line_dict["Alpha"] is None:
line_dict["Alpha"] = 1
line_dict["Color"] = matplotlib.colors.to_hex(line.get_color())
line_dict["Linewidth"] = line.get_linewidth()
line_dict["Line Style"] = line.get_linestyle()
line_dict["Marker Style"] = get_dict_from_marker_style(line)
return line_dict
def get_dict_from_marker_style(line):
style_dict = {}
style_dict["Face Color"] = matplotlib.colors.to_hex(line.get_markerfacecolor())
style_dict["Edge Color"] = matplotlib.colors.to_hex(line.get_markeredgecolor())
style_dict["Edge Width"] = line.get_markeredgewidth()
style_dict["Marker Type"] = line.get_marker()
style_dict["Marker Size"] = line.get_markersize()
style_dict["ZOrder"] = line.get_zorder()
return style_dict
def get_dict_from_text(text):
text_dict = {}
text_dict["Text"] = text.get_text()
if text_dict["Text"]:
text_dict["Transform"] = text.get_transform()
text_dict["Position"] = text.get_position()
text_dict["Style"] = get_dict_from_text_style(text)
return text_dict
def get_dict_from_text_style(text):
style_dict = {}
style_dict["Alpha"] = text.get_alpha()
if style_dict["Alpha"] is None:
style_dict["Alpha"] = 1
style_dict["Text Size"] = text.get_size()
style_dict["Color"] = matplotlib.colors.to_hex(text.get_color())
style_dict["hAlign"] = text.get_horizontalalignment()
style_dict["vAlign"] = text.get_verticalalignment()
style_dict["mAlign"] = text._multialignment
style_dict["Rotation"] = text.get_rotation()
style_dict["ZOrder"] = text.get_zorder()
return style_dict
def get_dict_from_legend(legend):
legend_dict = {}
legend_elements_list = get_list_of_legend_children(legend)
legend_elements_list.append(legend.legendPatch)
text_list = []
line_list = []
for child in legend_elements_list:
try:
if isinstance(child, matplotlib.text.Text):
if child.get_text() != None:
text_list.append(get_dict_from_text(child))
if isinstance(child, matplotlib.lines.Line2D):
line_list.append(get_dict_from_line(child))
except NotImplementedError:
# Basically do nothing
pass
legend_dict["Text"] = text_list
legend_dict["Line List"] = line_list
return legend_dict
def get_list_of_legend_children(legend):
legend_list = []
if hasattr(legend, 'get_children') and len(legend.get_children()) > 0:
for child in legend.get_children():
legend_list.append(get_list_of_legend_children(child))
else:
legend_list.append(legend)
return legend_list
def get_dict_from_fig_properties(fig):
fig_dict = {}
fig_dict["Fig width"] = fig.get_figwidth()
fig_dict["Fig height"] = fig.get_figheight()
fig_dict["dpi"] = fig.dpi
return fig_dict
XVals = np.array([1, 2, 3])
YVals = np.array([1, 2, 3])
plt.plot(XVals, YVals)
dictionary = get_dict_from_fig(plt.gcf())
f = open("./savefile.json", "w+")
f.write(json.dumps(dictionary, indent=4))
I was wondering if there was any way to do this already, perhaps with a maintained library? I've tried to find something to do it and nothing that I could find was overly useful, besides inspiration. I have already used mpld3 for inspiration.
I should probably have mentioned this earlier but when saving it is key to load back the data that has been saved, else there would be little point in saving it.

Related

Threading and Matplotlib

I have created an application which basically generate graphs and displays them. However due to some processes taking a longer time i had created a loading bar which requires the use of threading.
Everytime i run the application the first time, the subsequent time displaying a heatgraph causes "RuntimeError: main thread is not in main loop"
Without the use of threading, it would work perfectly fine. I would like to thread and still continuously use graphs.
Sorry for the messy codes.
class App(customtkinter.CTk):
def generate_heatmap(self):
print('Generate HeatMap')
brightness = []
motion = []
for k in figs_b.keys():
l = figs_b[k][1]
brightness.append(sum(l)/len(l))
for s in figs_m.keys():
print(s)
l = figs_m[s][1]
motion.append(max(l))
data = pd.read_csv(filedialog.askopenfilename())
print(data)
data.insert(2,'Color',avg_c)
data.insert(3,'Motion',motion)
data.insert(4,'Brightness',brightness)
sns.heatmap(data.corr(), annot=True, cmap='viridis', vmin=-1, vmax=1)
plt.show()
def loading(self):
if(self.t.is_alive()):
print('EXECUTE LOAD')
self.main_frame.grid_forget()
self.loading_frame.grid(row=0,column=1,rowspan=4,columnspan=4,sticky="nsew")
self.loading_frame.grid_propagate(False)
self.loading_lbl.pack_propagate(False)
extract_image_from_gif()
self.play_gif()
self.update()
print('RESET LOAD')
self.loading_frame.grid_forget()
self.main_frame.grid(row=0,column=1,rowspan=4,columnspan=4,sticky="nsew")
self.update
def play_gif(self):
global countx, cur_img
try:
print('in')
countx += 2
cur_img = customtkinter.CTkImage(lst_image[countx],size=(800,200))
self.loading_lbl.configure(image = cur_img)
self.update()
self.after(gif_duration,self.play_gif)
except Exception as e:
#print(e)
if(self.t.is_alive()):
countx=0
self.after(gif_duration, self.play_gif)
else:
return
def openfile(self,force=''):
name = force
decision = ''
if force == '':
decision = filedialog.askopenfilename().split('/')
name = decision[len(decision)-1].split('.')[0]
if name == '':
return
self.btn_list.append(customtkinter.CTkButton(self.btn_frame,text=name,command=lambda: self.change_analysis(name)))
for x,btn in enumerate(self.btn_list):
btn.grid(row=x+1,column=0,padx=10,pady=10)
print('*'*20)
self.t = th.Thread(target=self.getGraph, args=(decision,name,))
self.t.start()
self.loading()
self.t.join()
self.change_analysis(name)
print('*'*20)
def getGraph(self,decision,name):
if os.path.exists(f'Frames/{name}') and decision != '':
return
elif(decision !=''):
frame_extraction('\\'.join(decision),name)
red = []
green = []
blue = []
files = os.listdir(f'Frames/{name}')
sorted_files = [file.replace('f_','') for file in files]
sorted_files = sorted(sorted_files, key=lambda x: int(os.path.splitext(x)[0]))
for frame in sorted_files:
rgb = color_extraction(f'Frames/{name}/f_{frame}')
r,g,b = rgb[0]
red.append(r)
green.append(g)
blue.append(b)
#print(f'Frame: {frame} RGB:{rgb[0]}')
frames = [(int(os.path.splitext(x)[0]))*20 for x in sorted_files]
figs_c[name] = generate_graph(red,green,blue,frames)
if(decision != ''):
figs_m[name] = motion_analysis(name)
figs_b[name] = brightness_graph(name)
print(figs_b[name])
self.save_data()
avg_r = round(sum(red) / len(red))
avg_g = round(sum(green) / len(green))
avg_b = round(sum(blue) / len(blue))
avg_c.append(rgbtoint32([avg_r,avg_g,avg_b])

Coco_eval - Average Precision and Recall

I trained my model with maskrcnn and now I need to test it. How can I extract AP and AR and plot the graph, ok I know how to plot with matplotlib, but I need to plot Precision-recall curve but for that don't know how to access AP and AR values. Where are they saved?
I'm using this coco_eval script, and from here I see in function summarize there are print("IoU metric: {}".format(iou_type)) and this I got in output and under that AP and AR results, but I can't find it here in code. Where is this calculation?
coco_eval.py
import json
import tempfile
import numpy as np
import copy
import time
import torch
import torch._six
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import pycocotools.mask as mask_util
from collections import defaultdict
import utils
class CocoEvaluator(object):
def __init__(self, coco_gt, iou_types):
assert isinstance(iou_types, (list, tuple))
coco_gt = copy.deepcopy(coco_gt)
self.coco_gt = coco_gt
self.iou_types = iou_types
self.coco_eval = {}
for iou_type in iou_types:
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
self.img_ids = []
self.eval_imgs = {k: [] for k in iou_types}
def update(self, predictions):
img_ids = list(np.unique(list(predictions.keys())))
self.img_ids.extend(img_ids)
for iou_type in self.iou_types:
results = self.prepare(predictions, iou_type)
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
coco_eval = self.coco_eval[iou_type]
coco_eval.cocoDt = coco_dt
coco_eval.params.imgIds = list(img_ids)
img_ids, eval_imgs = evaluate(coco_eval)
self.eval_imgs[iou_type].append(eval_imgs)
def synchronize_between_processes(self):
for iou_type in self.iou_types:
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
def accumulate(self):
for coco_eval in self.coco_eval.values():
coco_eval.accumulate()
def summarize(self):
for iou_type, coco_eval in self.coco_eval.items():
print("IoU metric: {}".format(iou_type))
coco_eval.summarize()
def prepare(self, predictions, iou_type):
if iou_type == "bbox":
return self.prepare_for_coco_detection(predictions)
elif iou_type == "segm":
return self.prepare_for_coco_segmentation(predictions)
elif iou_type == "keypoints":
return self.prepare_for_coco_keypoint(predictions)
else:
raise ValueError("Unknown iou type {}".format(iou_type))
def prepare_for_coco_detection(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"bbox": box,
"score": scores[k],
}
for k, box in enumerate(boxes)
]
)
return coco_results
def prepare_for_coco_segmentation(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
scores = prediction["scores"]
labels = prediction["labels"]
masks = prediction["masks"]
masks = masks > 0.5
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
rles = [
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
for mask in masks
]
for rle in rles:
rle["counts"] = rle["counts"].decode("utf-8")
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
"segmentation": rle,
"score": scores[k],
}
for k, rle in enumerate(rles)
]
)
return coco_results
def prepare_for_coco_keypoint(self, predictions):
coco_results = []
for original_id, prediction in predictions.items():
if len(prediction) == 0:
continue
boxes = prediction["boxes"]
boxes = convert_to_xywh(boxes).tolist()
scores = prediction["scores"].tolist()
labels = prediction["labels"].tolist()
keypoints = prediction["keypoints"]
keypoints = keypoints.flatten(start_dim=1).tolist()
coco_results.extend(
[
{
"image_id": original_id,
"category_id": labels[k],
'keypoints': keypoint,
"score": scores[k],
}
for k, keypoint in enumerate(keypoints)
]
)
return coco_results
def convert_to_xywh(boxes):
xmin, ymin, xmax, ymax = boxes.unbind(1)
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
def merge(img_ids, eval_imgs):
all_img_ids = utils.all_gather(img_ids)
all_eval_imgs = utils.all_gather(eval_imgs)
merged_img_ids = []
for p in all_img_ids:
merged_img_ids.extend(p)
merged_eval_imgs = []
for p in all_eval_imgs:
merged_eval_imgs.append(p)
merged_img_ids = np.array(merged_img_ids)
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
# keep only unique (and in sorted order) images
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
merged_eval_imgs = merged_eval_imgs[..., idx]
return merged_img_ids, merged_eval_imgs
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
img_ids, eval_imgs = merge(img_ids, eval_imgs)
img_ids = list(img_ids)
eval_imgs = list(eval_imgs.flatten())
coco_eval.evalImgs = eval_imgs
coco_eval.params.imgIds = img_ids
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
#################################################################
# From pycocotools, just removed the prints and fixed
# a Python3 bug about unicode not defined
#################################################################
# Ideally, pycocotools wouldn't have hard-coded prints
# so that we could avoid copy-pasting those two functions
def createIndex(self):
# create index
# print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
# print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
maskUtils = mask_util
def loadRes(self, resFile):
"""
Load result file and return a result api object.
Args:
self (obj): coco object with ground truth annotations
resFile (str): file name of result file
Returns:
res (obj): result api object
"""
res = COCO()
res.dataset['images'] = [img for img in self.dataset['images']]
# print('Loading and preparing results...')
# tic = time.time()
if isinstance(resFile, torch._six.string_classes):
anns = json.load(open(resFile))
elif type(resFile) == np.ndarray:
anns = self.loadNumpyAnnotations(resFile)
else:
anns = resFile
assert type(anns) == list, 'results in not an array of objects'
annsImgIds = [ann['image_id'] for ann in anns]
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if 'caption' in anns[0]:
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
for id, ann in enumerate(anns):
ann['id'] = id + 1
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
bb = ann['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in ann:
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
ann['area'] = bb[2] * bb[3]
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'segmentation' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
# now only support compressed RLE format as segmentation results
ann['area'] = maskUtils.area(ann['segmentation'])
if 'bbox' not in ann:
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
ann['id'] = id + 1
ann['iscrowd'] = 0
elif 'keypoints' in anns[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for id, ann in enumerate(anns):
s = ann['keypoints']
x = s[0::3]
y = s[1::3]
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
ann['area'] = (x2 - x1) * (y2 - y1)
ann['id'] = id + 1
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
res.dataset['annotations'] = anns
createIndex(res)
return res
def evaluate(self):
'''
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
:return: None
'''
# tic = time.time()
# print('Running per image evaluation...')
p = self.params
# add backward compatibility if useSegm is specified in params
if p.useSegm is not None:
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
# print('Evaluate annotation type *{}*'.format(p.iouType))
p.imgIds = list(np.unique(p.imgIds))
if p.useCats:
p.catIds = list(np.unique(p.catIds))
p.maxDets = sorted(p.maxDets)
self.params = p
self._prepare()
# loop through images, area range, max detection number
catIds = p.catIds if p.useCats else [-1]
if p.iouType == 'segm' or p.iouType == 'bbox':
computeIoU = self.computeIoU
elif p.iouType == 'keypoints':
computeIoU = self.computeOks
self.ious = {
(imgId, catId): computeIoU(imgId, catId)
for imgId in p.imgIds
for catId in catIds}
evaluateImg = self.evaluateImg
maxDet = p.maxDets[-1]
evalImgs = [
evaluateImg(imgId, catId, areaRng, maxDet)
for catId in catIds
for areaRng in p.areaRng
for imgId in p.imgIds
]
# this is NOT in the pycocotools code, but could be done outside
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
self._paramsEval = copy.deepcopy(self.params)
# toc = time.time()
# print('DONE (t={:0.2f}s).'.format(toc-tic))
return p.imgIds, evalImgs
#################################################################
# end of straight copy from pycocotools, just removing the prints
#################################################################
And this is my code for evaluation:
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:'
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator
This is my results what I got:

ID3 Algorithm in Python

I am trying to plot a decision tree using ID3 in Python. I am really new to Python and couldn't understand the implementation of the following code. I need to know how I can apply this code to my data.
from math import log
import operator
def entropy(data):
entries = len(data)
labels = {}
for feat in data:
label = feat[-1]
if label not in labels.keys():
labels[label] = 0
labels[label] += 1
entropy = 0.0
for key in labels:
probability = float(labels[key])/entries
entropy -= probability * log(probability,2)
return entropy
def split(data, axis, val):
newData = []
for feat in data:
if feat[axis] == val:
reducedFeat = feat[:axis]
reducedFeat.extend(feat[axis+1:])
newData.append(reducedFeat)
return newData
def choose(data):
features = len(data[0]) - 1
baseEntropy = entropy(data)
bestInfoGain = 0.0;
bestFeat = -1
for i in range(features):
featList = [ex[i] for ex in data]
uniqueVals = set(featList)
newEntropy = 0.0
for value in uniqueVals:
newData = split(data, i, value)
probability = len(newData)/float(len(data))
newEntropy += probability * entropy(newData)
infoGain = baseEntropy - newEntropy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeat = i
return bestFeat
def majority(classList):
classCount={}
for vote in classList:
if vote not in classCount.keys(): classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def tree(data,labels):
classList = [ex[-1] for ex in data]
if classList.count(classList[0]) == len(classList):
return classList[0]
if len(data[0]) == 1:
return majority(classList)
bestFeat = choose(data)
bestFeatLabel = labels[bestFeat]
theTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [ex[bestFeat] for ex in data]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
theTree[bestFeatLabel][value] = tree(split/(data, bestFeat, value),subLabels)
return theTree
So what I did after this is the following:
infile=open("SData.csv","r")
data=infile.read()
tree(data)
The error which I got is "1 argument is missing" which is the label which I have to define and this is where I don't know what I have to put. I tried the variable for which I have to make the decision tree but it doesn't work:
tree(data,MinTemp)
Here I get an error "MinTemp is not defined".
Please help me out and let me know what I should do to have a look at the tree.
Following is the part of data and I want to generate a tree for MinTemp
MinTemp,Rainfall,Tempat9,RHat9,CAat9,WSat9
high,no,mild,normal,overcast,weak
high,no,mild,normal,cloudy,weak
high,no,mild,normal,cloudy,mild
high,yes,mild,high,cloudy,weak
high,yes,mild,high,cloudy,mild
medium,yes,mild,high,cloudy,mild
high,no,mild,high,overcast,weak
high,no,mild,normal,sunny,weak
high,no,hot,normal,sunny,weak
high,no,hot,normal,overcast,weak

How to add a member function to an existing Python object?

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().
An example is:
import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related
class A(object):
"""the neural network for simulation"""
'''
todo:
- find boundary
- get_angles_from_coefficients
'''
def __init__(self,
index, # the index of the current network
list_of_coor_data_files, # accept multiple files of training data
energy_expression_file, # input, output files
preprocessing_settings = None,
connection_between_layers = None, connection_with_bias_layers = None,
PCs = None, # principal components
):
self._index = index
self._list_of_coor_data_files = list_of_coor_data_files
self._energy_expression_file = energy_expression_file
self._data_set = []
for item in list_of_coor_data_files:
self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)
self._preprocessing_settings = preprocessing_settings
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
self._node_num = [8, 15, 2, 15, 8]
self._PCs = PCs
def save_into_file(self, filename = None):
if filename is None:
filename = "network_%s.pkl" % str(self._index) # by default naming with its index
with open(filename, 'wb') as my_file:
pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)
return
def get_cossin_from_a_coordinate(self, a_coordinate):
num_of_coordinates = len(a_coordinate) / 3
a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors
diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors
cos_of_angles = range(len(normal_vectors_normalized_1))
sin_of_angles_vec = range(len(normal_vectors_normalized_1))
sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization
for index in range(len(normal_vectors_normalized_1)):
cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));
return cos_of_angles + sin_of_angles
def get_many_cossin_from_coordinates(self, coordinates):
return map(self.get_cossin_from_a_coordinate, coordinates)
def get_many_cossin_from_coordiantes_in_file (self, filename):
coordinates = np.loadtxt(filename)
return self.get_many_cossin_from_coordinates(coordinates)
def mapminmax(self, my_list): # for preprocessing in network
my_min = min(my_list)
my_max = max(my_list)
mul_factor = 2.0 / (my_max - my_min)
offset = (my_min + my_max) / 2.0
result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
return (result_list, (mul_factor, offset)) # also return the parameters for processing
def get_mapminmax_preprocess_result_and_coeff(self,data=None):
if data is None:
data = self._data_set
data = np.array(data)
data = np.transpose(data)
result = []; params = []
for item in data:
temp_result, preprocess_params = self.mapminmax(item)
result.append(temp_result)
params.append(preprocess_params)
return (np.transpose(np.array(result)), params)
def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
# try begin
if preprocessing_settings is None:
preprocessing_settings = self._preprocessing_settings
temp_setttings = np.transpose(np.array(preprocessing_settings))
result = []
for item in input_data:
item = np.multiply(item - temp_setttings[1], temp_setttings[0])
result.append(item)
return result
# try end
def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
if connection_between_layers is None:
connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None:
connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
expression = ""
# first part: network
for i in range(2):
expression = '\n' + expression
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
bias_coef = connection_with_bias_layers[i].params
for j in range(np.size(mul_coef, 0)):
temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j)
for k in range(np.size(mul_coef, 1)):
temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)
temp_expression += ' %f);\n' % (bias_coef[j])
expression = temp_expression + expression # order of expressions matter in OpenMM
# second part: definition of inputs
index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
for i in range(len(index_of_backbone_atoms) - 3):
index_of_coss = i
index_of_sins = i + 4
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
(i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])
return expression
def write_expression_into_file(self, out_file = None):
if out_file is None: out_file = self._energy_expression_file
expression = self.get_expression_of_network()
with open(out_file, 'w') as f_out:
f_out.write(expression)
return
def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
if input_data is None: input_data = self._data_set
if connection_between_layers is None: connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
temp_mid_result = range(4)
mid_result = []
# first need to do preprocessing
for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):
for i in range(4):
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
bias_coef = connection_with_bias_layers[i].params
previous_result = item if i == 0 else temp_mid_result[i - 1]
temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
if i != 3: # the last output layer is a linear layer, while others are tanh layers
temp_mid_result[i] = map(tanh, temp_mid_result[i])
mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
return mid_result
def get_PC_and_save_it_to_network(self):
'''get PCs and save the result into _PCs
'''
mid_result = self.get_mid_result()
self._PCs = [item[1] for item in mid_result]
return
def train(self):
####################### set up autoencoder begin #######################
node_num = self._node_num
in_layer = LinearLayer(node_num[0], "IL")
hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
out_layer = LinearLayer(node_num[4], "OL")
layer_list = [in_layer] + hidden_layers + [out_layer]
molecule_net = FeedForwardNetwork()
molecule_net.addInputModule(in_layer)
for item in (hidden_layers + bias_layers):
molecule_net.addModule(item)
molecule_net.addOutputModule(out_layer)
connection_between_layers = range(4); connection_with_bias_layers = range(4)
for i in range(4):
connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers
molecule_net.addConnection(connection_with_bias_layers[i])
molecule_net.sortModules() # this is some internal initialization process to make this module usable
####################### set up autoencoder end #######################
trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
data_set = SupervisedDataSet(node_num[0], node_num[4])
sincos = self._data_set
(sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
for item in sincos_after_process: # is it needed?
data_set.addSample(item, item)
trainer.trainUntilConvergence(data_set, maxEpochs=50)
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
print("Done!\n")
return
def create_sge_files_for_simulation(self,potential_centers = None):
if potential_centers is None:
potential_centers = self.get_boundary_points()
neural_network_related.create_sge_files(potential_centers)
return
def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
if list_of_points is None: list_of_points = self._PCs
x = [item[0] for item in list_of_points]
y = [item[1] for item in list_of_points]
temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
hist_matrix = temp[0]
# add a set of zeros around this region
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)
hist_matrix = (hist_matrix != 0).astype(int)
sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]
bin_width_0 = temp[1][1]-temp[1][0]
bin_width_1 = temp[2][1]-temp[2][0]
min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid
min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1
potential_centers = []
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks
temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
potential_centers.append(temp_potential_center)
return potential_centers
# this function is added after those old objects of A were created
def plotting_in_PC_space_with_coloring_option(self,
list_of_coordinate_files_for_plotting=None, # accept multiple files
color_option='pure'):
'''
by default, we are using training data, and we also allow external data input
'''
if list_of_coordinate_files_for_plotting is None:
PCs_to_plot = self._PCs
else:
temp_sincos = []
for item in list_of_coordinate_files_for_plotting:
temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)
temp_mid_result = self.get_mid_result(input_data = temp_sincos)
PCs_to_plot = [item[1] for item in temp_mid_result]
(x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])
# coloring
if color_option == 'pure':
coloring = 'red'
elif color_option == 'step':
coloring = range(len(x))
fig, ax = plt.subplots()
ax.scatter(x,y, c=coloring)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
plt.show()
return
But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?
Thanks!
Something like this:
class A:
def q(self): print 1
a = A()
def f(self): print 2
setattr(A, 'f', f)
a.f()
This is called a monkey patch.

Getting type error for the following code

I am getting type error for the following code. The output of the code should be the graph on this page.http://www.realclearpolitics.com/epolls/other/president_obama_job_approval-1044.html . When I run the code, the error is shown in this part of the code.
reduce_the_data = new_take_page[new_colors.keys()].sum(axis=1)/100
def get_poll_data(poll_id):
url = "http://charts.realclearpolitics.com/charts/%i.xml" %int(poll_id)
return requests.get(url).text # is used to get the text from a url
def color_function(xml):
dom = web.Element(xml)
colors_dict ={}
for i in dom.by_tag('graph'):
name = i.attributes['title']
hex_colors = i.attributes['color']
colors_dict[name] = hex_colors
return colors_dict
def strip(s):
re.sub(r'[\W_]+', '', s)
def take_page(xml):
dom = web.Element(xml)
final = {}
charts_page = dom.by_tag('series')[0]
y = {i.attributes['xid']: str(i.content) for i in charts_page.by_tag('value')}
key_of_y = y.keys()
final['date'] = pd.to_datetime([y[j] for j in key_of_y])
for each_value in dom.by_tag('graph'):
title_name = each_value.attributes['title']
new_dict = {n.attributes['xid']: float(n.content)
if n.content else np.nan for n in each_value.by_tag('value')}
final[title_name] = [new_dict[k] for k in key_of_y]
finals = pd.DataFrame(final)
finals = finals.sort(columns=['date'])
return finals
def new_func(poll_id):
new_poll_id = get_poll_data(poll_id)
new_take_page= take_page(new_poll_id)
new_colors = color_function(new_poll_id)
new_take_page = new_take_page.rename(columns = {c: strip(c) for c in new_take_page.columns})
reduce_the_data = new_take_page[new_colors.keys()].sum(axis=1)/100
for x in new_colors.items():
new_take_page[x] /= reduce_the_data
for sticker, color in new_colors.items():
plt.plot(new_take_page.date, new_take_page[sticker], color = color, label= sticker)
plt.xticks(rotation= 60)
plt.legend(loc ='best')
plt.xlabel("Date")
plt.ylabel("Normalized Poll percentage")
>> new_func(1044)
>> plt.title("Polling")
TypeError: 'NoneType' object is not iterable

Categories