Different output for same function - python

I am implemented a KNN algorithm in python.
import math
#height,width,deepth,thickness,Label
data_set = [(2,9,8,4, "Good"),
(3,7,7,9, "Bad"),
(10,3,10,3, "Good"),
(2,9,6,10, "Good"),
(3,3,2,5, "Bad"),
(2,8,5,6, "Bad"),
(7,2,3,10, "Good"),
(1,10,8,10, "Bad"),
(2,8,1,10, "Good")
]
A = (3,2,1,5)
B = (8,3,1,2)
C = (6,10,8,3)
D = (9,6,4,1)
distances = []
labels = []
def calc_distance(datas,test):
for data in datas:
distances.append(
( round(math.sqrt(((data[0] - test[0])**2 + (data[1] - test[1])**2 + (data[2] - test[2])**2 + (data[3] - test[3])**2)), 3), data[4] ))
return distances
def most_frequent(list1):
return max(set(list1), key = list1.count)
def get_neibours(k):
distances.sort()
print(distances[:k])
for distance in distances[:k]:
labels.append(distance[1])
print("It can be classified as: ", end="")
print(most_frequent(labels))
calc_distance(data_set,D)
get_neibours(7)
calc_distance(data_set,D)
get_neibours(7)
I works well mostly and I get the correct label. For example for D, i do get the label "Good". However i discovered a bug that when I call it twice for example:
calc_distance(data_set,D)
get_neibours(7)
calc_distance(data_set,D)
get_neibours(7)
and I run it few times, i get different outputs- "Good" and "Bad" when I run the program couple of times..
There must be a bug somewhere I am unable to find out.

The problem is that you are using the same distances and label, sorting and getting the k first elements. Create the lists inside the functions and return it. Check the modifications bellow.
import math
data_set = [
(2,9,8,4, "Good"),
(3,7,7,9, "Bad"),
(10,3,10,3, "Good"),
(2,9,6,10, "Good"),
(3,3,2,5, "Bad"),
(2,8,5,6, "Bad"),
(7,2,3,10, "Good"),
(1,10,8,10, "Bad"),
(2,8,1,10, "Good"),
]
A = (3,2,1,5)
B = (8,3,1,2)
C = (6,10,8,3)
D = (9,6,4,1)
def calc_distance(datas, test):
distances = []
for data in datas:
distances.append(
( round(math.sqrt(((data[0] - test[0])**2 + (data[1] - test[1])**2 + (data[2] - test[2])**2 + (data[3] - test[3])**2)), 3), data[4] ))
return distances
def most_frequent(list1):
return max(set(list1), key = list1.count)
def get_neibours(distances, k):
labels = []
distances.sort()
print(distances[:k])
for distance in distances[:k]:
labels.append(distance[1])
print("It can be classified as: ", end="")
print(most_frequent(labels))
distances = calc_distance(data_set,D)
get_neibours(distances, 7)
distances = calc_distance(data_set,D)
get_neibours(distances, 7)
[(7.071, 'Good'), (8.062, 'Bad'), (8.888, 'Bad'), (9.11, 'Good'), (10.1, 'Good'), (10.488, 'Bad'), (11.958, 'Good')]
It can be classified as: Good
[(7.071, 'Good'), (8.062, 'Bad'), (8.888, 'Bad'), (9.11, 'Good'), (10.1, 'Good'), (10.488, 'Bad'), (11.958, 'Good')]
It can be classified as: Good

Related

Replace dot product for loop Numpy

I am trying to replace the dot product for loop using something faster like NumPy
I did research on dot product and kind of understand and can get it working with toy data in a few ways in but not 100% when it comes to implementing it for actual use with a data frame.
I looked at these and other SO threads to no luck avoide loop dot product, matlab and dot product subarrays without for loop and multiple numpy dot products without a loop
looking to do something like this which works with toy numbers in np array
u1 =np.array([1,2,3])
u2 =np.array([2,3,4])
v1.dot(v2)
20
u1 =np.array([1,2,3])
u2 =np.array([2,3,4])
(u1 * u2).sum()
20
u1 =np.array([1,2,3])
u2 =np.array([2,3,4])
sum([x1*x2 for x1, x2 in zip (u1, u2)])
20
this is the current working get dot product
I would like to do this with out the for loop
def get_dot_product(self, courseid1, courseid2, unit_vectors):
u1 = unit_vectors[courseid1]
u2 = unit_vectors[courseid2]
dot_product = 0.0
for dimension in u1:
if dimension in u2:
dot_product += u1[dimension] * u2[dimension]
return dot_product
** code**
#!/usr/bin/env python
# coding: utf-8
class SearchRecommendationSystem:
def __init__(self):
pass
def get_bag_of_words(self, titles_lines):
bag_of_words = {}
for index, row in titles_lines.iterrows():
courseid, course_bag_of_words = self.get_course_bag_of_words(row)
for word in course_bag_of_words:
word = str(word).strip() # added
if word not in bag_of_words:
bag_of_words[word] = course_bag_of_words[word]
else:
bag_of_words[word] += course_bag_of_words[word]
return bag_of_words
def get_course_bag_of_words(self, line):
course_bag_of_words = {}
courseid = line['courseid']
title = line['title'].lower()
description = line['description'].lower()
wordlist = title.split() + description.split()
if len(wordlist) >= 10:
for word in wordlist:
word = str(word).strip() # added
if word not in course_bag_of_words:
course_bag_of_words[word] = 1
else:
course_bag_of_words[word] += 1
return courseid, course_bag_of_words
def get_sorted_results(self, d):
kv_list = d.items()
vk_list = []
for kv in kv_list:
k, v = kv
vk = v, k
vk_list.append(vk)
vk_list.sort()
vk_list.reverse()
k_list = []
for vk in vk_list[:10]:
v, k = vk
k_list.append(k)
return k_list
def get_keywords(self, titles_lines, bag_of_words):
n = sum(bag_of_words.values())
keywords = {}
for index, row in titles_lines.iterrows():
courseid, course_bag_of_words = self.get_course_bag_of_words(row)
term_importance = {}
for word in course_bag_of_words:
word = str(word).strip() # extra
tf_course = (float(course_bag_of_words[word]) / sum(course_bag_of_words.values()))
tf_overall = float(bag_of_words[word]) / n
term_importance[word] = tf_course / tf_overall
keywords[str(courseid)] = self.get_sorted_results(term_importance)
return keywords
def get_inverted_index(self, keywords):
inverted_index = {}
for courseid in keywords:
for keyword in keywords[courseid]:
if keyword not in inverted_index:
keyword = str(keyword).strip() # added
inverted_index[keyword] = []
inverted_index[keyword].append(courseid)
return inverted_index
def get_search_results(self, query_terms, keywords, inverted_index):
search_results = {}
for term in query_terms:
term = str(term).strip()
if term in inverted_index:
for courseid in inverted_index[term]:
if courseid not in search_results:
search_results[courseid] = 0.0
search_results[courseid] += (
1 / float(keywords[courseid].index(term) + 1) *
1 / float(query_terms.index(term) + 1)
)
sorted_results = self.get_sorted_results(search_results)
return sorted_results
def get_titles(self, titles_lines):
titles = {}
for index, row in titles_lines.iterrows():
titles[row['courseid']] = row['title'][:60]
return titles
def get_unit_vectors(self, keywords, categories_lines):
norm = 1.884
cat = {}
subcat = {}
for line in categories_lines[1:]:
courseid_, category, subcategory = line.split('\t')
cat[courseid_] = category.strip()
subcat[courseid_] = subcategory.strip()
unit_vectors = {}
for courseid in keywords:
u = {}
if courseid in cat:
u[cat[courseid]] = 1 / norm
u[subcat[courseid]] = 1 / norm
for keyword in keywords[courseid]:
u[keyword] = (1 / float(keywords[courseid].index(keyword) + 1) / norm)
unit_vectors[courseid] = u
return unit_vectors
def get_dot_product(self, courseid1, courseid2, unit_vectors):
u1 = unit_vectors[courseid1]
u2 = unit_vectors[courseid2]
dot_product = 0.0
for dimension in u1:
if dimension in u2:
dot_product += u1[dimension] * u2[dimension]
return dot_product
def get_recommendation_results(self, seed_courseid, keywords, inverted_index, unit_vectors):
courseids = []
seed_courseid = str(seed_courseid).strip()
for keyword in keywords[seed_courseid]:
for courseid in inverted_index[keyword]:
if courseid not in courseids and courseid != seed_courseid:
courseids.append(courseid)
dot_products = {}
for courseid in courseids:
dot_products[courseid] = self.get_dot_product(seed_courseid, courseid, unit_vectors)
sorted_results = self.get_sorted_results(dot_products)
return sorted_results
def Final(self):
print("Reading Title file.......")
titles_lines = open('s2-titles.txt', encoding="utf8").readlines()
print("Reading Category file.......")
categories_lines = open('s2-categories.tsv', encoding = "utf8").readlines()
print("Getting Supported Functions Data")
bag_of_words = self.get_bag_of_words(titles_lines)
keywords = self.get_keywords(titles_lines, bag_of_words)
inverted_index = self.get_inverted_index(keywords)
titles = self.get_titles(titles_lines)
print("Getting Unit Vectors")
unit_vectors = self.get_unit_vectors(keywords=keywords, categories_lines=categories_lines)
#Search Part
print("\n ############# Started Search Query System ############# \n")
query = input('Input your search query: ')
while query != '':
query_terms = query.split()
search_sorted_results = self.get_search_results(query_terms, keywords, inverted_index)
print(f"==> search results for query: {query.split()}")
for search_result in search_sorted_results:
print(f"{search_result.strip()} - {str(titles[search_result]).strip()}")
#ask again for query or quit the while loop if no query is given
query = input('Input your search query [hit return to finish]: ')
print("\n ############# Started Recommendation Algorithm System ############# \n")
# Recommendation ALgorithm Part
seed_courseid = (input('Input your seed courseid: '))
while seed_courseid != '':
seed_courseid = str(seed_courseid).strip()
recom_sorted_results = self.get_recommendation_results(seed_courseid, keywords, inverted_index, unit_vectors)
print('==> recommendation results:')
for rec_result in recom_sorted_results:
print(f"{rec_result.strip()} - {str(titles[rec_result]).strip()}")
get_dot_product_ = self.get_dot_product(seed_courseid, str(rec_result).strip(), unit_vectors)
print(f"Dot Product Value: {get_dot_product_}")
seed_courseid = (input('Input seed courseid [hit return to finish]:'))
if __name__ == '__main__':
obj = SearchRecommendationSystem()
obj.Final()
s2-categories.tsv
courseid category subcategory
21526 Design 3D & Animation
153082 Marketing Advertising
225436 Marketing Affiliate Marketing
19482 Office Productivity Apple
33883 Office Productivity Apple
59526 IT & Software Operating Systems
29219 Personal Development Career Development
35057 Personal Development Career Development
40751 Personal Development Career Development
65210 Personal Development Career Development
234414 Personal Development Career Development
Example of how s2-titles.txt looks
courseidXXXYYYZZZtitleXXXYYYZZZdescription
3586XXXYYYZZZLearning Tools for Mrs B's Science Classes This is a series of lessons that will introduce students to the learning tools that will be utilized throughout the schoXXXYYYZZZThis is a series of lessons that will introduce students to the learning tools that will be utilized throughout the school year The use of these tools serves multiple purposes 1 Allow the teacher to give immediate and meaningful feedback on work that is in progress 2 Allow students to have access to content and materials when outside the classroom 3 Provide a variety of methods for students to experience learning materials 4 Provide a variety of methods for students to demonstrate learning 5 Allow for more time sensitive correction grading and reflections on concepts that are assessed
Evidently unit_vectors is a dictionary, from which you extract to 2 values, u1 and u2.
But what are those? Evidently dicts as well (this iteration would not make sense with a list):
for dimension in u1:
if dimension in u2:
dot_product += u1[dimension] * u2[dimension]
But what is u1[dimension]? A list? An array.
Normally dict are access by key as you do here. There isn't a numpy style "vectorization". vals = list(u1.values()) gets a lists of all values, and conceivably that could be made into an array (if the elements are right)
arr1 = np.array(list(u1.values()))
and a np.dot(arr1, arr2) might work
You'll get the best answers if you give small concrete examples - with real working data (and skip the complex generating code). Focus on the core of the problem, so we can grasp the issue with a 30 second read!
===
Looking more in depth at your dot function; this replicates the core (I think). Initially I missed the fact that you aren't iterating on u2 keys, but rather seeking matching ones.
def foo(dd):
x = 0
u1 = dd['u1']
u2 = dd['u2']
for k in u1:
if k in u2:
x += u1[k]*u2[k]
return x
Then making a dictionary of dictionaries:
In [30]: keys=list('abcde'); values=[1,2,3,4,5]
In [31]: adict = {k:v for k,v in zip(keys,values)}
In [32]: dd = {'u1':adict, 'u2':adict}
In [41]: dd
Out[41]:
{'u1': {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5},
'u2': {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}}
In [42]: foo(dd)
Out[42]: 55
In this case the subdictionaries match, so we get the same value with a simple array dot:
In [43]: np.dot(values,values)
Out[43]: 55
But if u2 was different, with different key/value pairs, and possibly different keys the result will be different. I don't see a way around the iterative access by keys. The sum-of-products part of the job is minor compared to the dictionary access.
In [44]: dd['u2'] = {'e':3, 'f':4, 'a':3}
In [45]: foo(dd)
Out[45]: 18
We could construct other data structures that are more suitable to a fast dot like calculation. But that's another topic.
Modified method
def get_dot_product(self, courseid1, courseid2, unit_vectors):
# u1 = unit_vectors[courseid1]
# u2 = unit_vectors[courseid2]
# dimensions = set(u1).intersection(set(u2))
# dot_product = sum(u1[dimension] * u2.get(dimension, 0) for dimension in dimensions)
u1 = unit_vectors[courseid1]
u2 = unit_vectors[courseid2]
dot_product = sum(u1[dimension] * u2.get(dimension, 0) for dimension in u2)
return dot_product

Using a for loop to plot arrays from dictionaries

I have a dictionary with multiple key defined as (arbitrary inputs):
colors = {}
colors['red'] = {}
colors['blue'] = {}
colors['red'][clustname] = np.array([])
colors['blue'][clustname] = np.array([])
basically I want to plot a red v blue graph for each 'cluster'. I have 13 'clusters' in total with differing color values for each. The names in my code are different from the arbitrary ones above, but I figured it would be easier to understand with basic values then to look at the overall code:
colpath = '/home/jacob/PHOTOMETRY/RESTFRAME_COLOURS/' #This is the path to the restframe colors
goodcolindx = {}
colfiledat = {}
colors = {}
colors['UMINV'] = {}
colors['VMINJ'] = {}
colors['NUVMINV'] = {}
colors['id'] = {}
for iclust in range(len(clustname)):
colors['UMINV'][clustname[iclust]] = np.array([])
colors['VMINJ'][clustname[iclust]] = np.array([])
colors['id'][clustname[iclust]] = np.array([])
colors['NUVMINV'][clustname[iclust]] = np.array([])
filepath = catpath + clustname[iclust] + "_totalall_" + extname[iclust] + ".cat"
photdat[clustname[iclust]] = ascii.read(filepath)
filepath = zpath + "compilation_" + clustname[iclust] + ".dat"
zdat[clustname[iclust]] = ascii.read(filepath)
colfilepath = colpath + 'RESTFRAME_MASTER_' + clustname[iclust] + '_indivredshifts.cat'
colfiledat[clustname[iclust]] = ascii.read(colfilepath)
goodcolindx[clustname[iclust]] = np.where((colfiledat[clustname[iclust]]['REDSHIFTUSED'] > 0.9) & \
(colfiledat[clustname[iclust]]['REDSHIFTUSED'] < 1.5) & \
(photdat[clustname[iclust]]['totmask'] == 0) & \
(photdat[clustname[iclust]]['K_flag'] == 0) & \
((zdat[clustname[iclust]]['quality'] == 3) | (zdat[clustname[iclust]]['quality'] == 4)))
goodcolindx[clustname[iclust]] = goodcolindx[clustname[iclust]][0]
for igood in range(len(goodcolindx[clustname[iclust]])):
idstring = str(photdat[clustname[iclust]]['id'][goodcolindx[clustname[iclust]][igood]])
colors['NUVMINV'][clustname[iclust]] = np.append(colors['NUVMINV'][clustname[iclust]], -2.5 *
np.log10(colfiledat[clustname[iclust]]['NUV'][goodcolindx[clustname[iclust]][igood]]
/ colfiledat[clustname[iclust]]['V'][goodcolindx[clustname[iclust]][igood]]))'SpARCS-0035'
colors['UMINV'][clustname[iclust]] = np.append(colors['UMINV'][clustname[iclust]], colfiledat[clustname[iclust]]['UMINV'][goodcolindx[clustname[iclust]][igood]])
colors['id'][clustname[iclust]] = np.append(colors['id'][clustname[iclust]], photdat[clustname[iclust]]['id'][goodcolindx[clustname[iclust]][igood]])
colors['VMINJ'][clustname[iclust]] = np.append(colors['VMINJ'][clustname[iclust]], colfiledat[clustname[iclust]]['VMINJ'][goodcolindx[clustname[iclust]][igood]])
for iclustc in colors:
plt.plot(colors['VMINJ'][clustname[iclustc]], colors['UMINV'][clustname[iclustc]], 'ko')
plt.show()
So in this case, my 'red' is the VMINJ and my 'blue' is the UMINV. I am trying to use a for loop to cycle through all the cluster names that I have, but I keep getting the error back 'String indices must be integers'. I understand the basics of that, but don't know how to fix my code to make plots for each 'red' v 'blue' for each cluster. Any help would be awesome, let me know if you have questions
I figured it out. I changed the for loop to:
for iclust in range(len(clustname)):
plt.plot(colors['UMINV'][clustname[iclust]]....
and that worked

Visualizing modified SIR model

I tried to modify the SIR model from the eon package and did some changes to it. It has a new vaccination parameter attached to it with new parameters beta and omega and Vl and my code is-
def test_transmission(u, v, p):
return random.random()<p
def discrete_SIR(G,
initial_infecteds,beta,
w,Vl,return_full_data=True):
if G.has_node(initial_infecteds):
initial_infecteds=[initial_infecteds]
if return_full_data:
node_history = defaultdict(lambda : ([tmin], ['S']))
transmissions = []
for node in initial_infecteds:
node_history[node] = ([tmin], ['I'])
transmissions.append((tmin-1, None, node))
node_history = defaultdict(lambda : ([tmin], ['S']))
# transmissions = []
for node in initial_infecteds:
node_history[node] = ([tmin], ['I'])
#transmissions.append((tmin-1, None, node))
N=G.order()
t = [tmin]
S = [N-len(initial_infecteds)]
I = [len(initial_infecteds)]
R = [0]
V = [0]
susceptible = defaultdict(lambda: True)
#above line is equivalent to u.susceptible=True for all nodes.
for u in initial_infecteds:
susceptible[u] = False
infecteds = set(initial_infecteds)
while infecteds and t[-1]<tmax :
new_infecteds = set()
vaccinated= set()
infector = {} #used for returning full data. a waste of time otherwise
for u in infecteds:
# print('u-->' +str(u))
for v in G.neighbors(u):
# print('v --> '+ str(v))
##vaccination
if len(vaccinated)+V[-1]< (Vl*N) : #check if vaccination over or not
#print(len(vaccinated),Vl*N)
#print("HI")
if susceptible[v] and test_transmission(u, v, w):
vaccinated.add(v)
susceptible[v] = False
# print('transmitting vaccination')
elif susceptible[v] and test_transmission(u,v,beta):
new_infecteds.add(v)
susceptible[v]=False
infector[v] = [u]
# print('transmitting infection')
else:
# print("BYE")
if susceptible[v] and test_transmission(u, v,beta):
new_infecteds.add(v)
susceptible[v] = False
infector[v] = [u]
#infector[v] = [u]
if return_full_data:
for v in infector.keys():
transmissions.append((t[-1], random.choice(infector[v]), v))
next_time = t[-1]+1
if next_time <= tmax:
for u in infecteds:
node_history[u][0].append(next_time)
node_history[u][1].append('R')
for v in new_infecteds:
node_history[v][0].append(next_time)
node_history[v][1].append('I')
infecteds = new_infecteds
R.append(R[-1]+I[-1])
V.append(len(vaccinated)+V[-1])
I.append(len(infecteds))
S.append(N-V[-1]-I[-1]-R[-1])
#S.append(S[-1]-V[-1]-I[-1])
t.append(t[-1]+1)
print(str(R[-1])+','+str(V[-1])+','+str(I[-1])+','+str(S[-1]))
if not return_full_data:
return scipy.array(t), scipy.array(S), scipy.array(I), \
scipy.array(R)
else:
return EoN.Simulation_Investigation(G, node_history, transmissions)
Now I want to run the visualizations on it like in the packagae EON-
m=5
G=nx.grid_2d_graph(m,m,periodic=True)
initial_infections = [(u,v) for (u,v) in G if u==int(m/2) and v==int(m/2)]
sim = EoN.basic_discrete_SIR(G,0.5,initial_infecteds = initial_infections,
return_full_data=True, tmax = 25)
pos = {node:node for node in G}
sim.set_pos(pos)
sim.display(0, node_size = 40) #display time 6
plt.show()
plt.savefig('SIR_2dgrid.png')
What changes do I need to do in my code so that the display function works or do I need to make changes in the display function also?
Here's the output I now get:
You'll have to install EoN version 1.0.8rc3 or later, which is available on the github page (see installation instructions). At present pip will not work to install it. I want to make sure I haven't broken anything before I make it the default installed by pip.
Here's the code based on yours. You should look through the changes I've made. It's also worth looking at the examples I've put in the documentation (including an SIRV model where the vaccination rule is different than what you've got).
from collections import defaultdict
import EoN
import networkx as nx
import random
import matplotlib.pyplot as plt
def test_transmission(u, v, p):
return random.random()<p
def discrete_SIRV(G, initial_infecteds,beta,
w,Vl,tmin=0,tmax=float('Inf'), return_full_data=True):
if G.has_node(initial_infecteds):
initial_infecteds=[initial_infecteds]
if return_full_data:
node_history = defaultdict(lambda : ([tmin], ['S']))
transmissions = []
for node in initial_infecteds:
node_history[node] = ([tmin], ['I'])
transmissions.append((tmin-1, None, node))
'''
node_history = defaultdict(lambda : ([tmin], ['S']))
# transmissions = []
for node in initial_infecteds:
node_history[node] = ([tmin], ['I'])
#transmissions.append((tmin-1, None, node))
'''
N=G.order()
t = [tmin]
S = [N-len(initial_infecteds)]
I = [len(initial_infecteds)]
R = [0]
V = [0]
susceptible = defaultdict(lambda: True)
#above line is equivalent to u.susceptible=True for all nodes.
for u in initial_infecteds:
susceptible[u] = False
infecteds = set(initial_infecteds)
while infecteds and t[-1]<tmax :
new_infecteds = set()
vaccinated= set()
infector = {} #used for returning full data. a waste of time otherwise
for u in infecteds:
# print('u-->' +str(u))
for v in G.neighbors(u):
# print('v --> '+ str(v))
##vaccination
if len(vaccinated)+V[-1]< (Vl*N) : #check if vaccination over or not
#print(len(vaccinated),Vl*N)
#print("HI")
if susceptible[v] and test_transmission(u, v, w):
vaccinated.add(v)
susceptible[v] = False
'''It's probably better to define a `new_vaccinated`
set and then do the `return_full_data` stuff later
where all the others are done.'''
if return_full_data:
node_history[v][0].append(t[-1]+1)
node_history[v][1].append('V')
# print('transmitting vaccination')
elif susceptible[v] and test_transmission(u,v,beta):
new_infecteds.add(v)
susceptible[v]=False
infector[v] = [u]
# print('transmitting infection')
else:
# print("BYE")
if susceptible[v] and test_transmission(u, v,beta):
new_infecteds.add(v)
susceptible[v] = False
infector[v] = [u]
#infector[v] = [u]
if return_full_data:
for v in infector.keys():
'''This random choice is no longer needed as you've taken out
the possibility of multiple nodes transmitting to `v` in a given
time step. Now only the first one encountered does it.'''
transmissions.append((t[-1], random.choice(infector[v]), v))
next_time = t[-1]+1
if next_time <= tmax:
for u in infecteds:
node_history[u][0].append(next_time)
node_history[u][1].append('R')
for v in new_infecteds:
node_history[v][0].append(next_time)
node_history[v][1].append('I')
infecteds = new_infecteds
R.append(R[-1]+I[-1])
V.append(len(vaccinated)+V[-1])
I.append(len(infecteds))
S.append(N-V[-1]-I[-1]-R[-1])
#S.append(S[-1]-V[-1]-I[-1])
t.append(t[-1]+1)
print(str(R[-1])+','+str(V[-1])+','+str(I[-1])+','+str(S[-1]))
if not return_full_data:
return scipy.array(t), scipy.array(S), scipy.array(I), \
scipy.array(R)
else:
return EoN.Simulation_Investigation(G, node_history, transmissions, possible_statuses=['S', 'I', 'R', 'V'])
print(EoN.__version__)
print("line above needs to be 1.0.8rc3 or greater or it will not work\n\n")
m=20
G=nx.grid_2d_graph(m,m,periodic=True)
initial_infections = [(u,v) for (u,v) in G if u==int(m/2) and v==int(m/2)]
beta=0.8
Vl=0.3
w=0.1
sim = discrete_SIRV(G, initial_infections, beta, w, Vl, return_full_data=True)
pos = {node:node for node in G}
sim.set_pos(pos)
sim.sim_update_colordict({'S': '#009a80','I':'#ff2000', 'R':'gray','V': '#5AB3E6'})
sim.display(6, node_size = 40) #display time 6
plt.savefig('SIRV_2dgrid.png')

python create genetic algorithm no convergence to maximum or minimum

Hello i need help for create genetic algorithme for converge to maximum or minimum value.
I develop a code for found maximum sentence ascii sum, but my code not converge to maximum, my code make "yoyo" value
like this picture :
matploltib output
i share my code :
import random
import statistics
EVOLUTION=[]
words = [
["Un", "Des", "Une", "On", "Elle"],
["a", "eu", "avait", "est", "était", "fut"],
["soif", "rouge"]
]
def individual(data):
#return tuple(random.choice(range(len(feature))) for feature in data)
return tuple(random.choice(range(len(feature))) for feature in data)
def population(data, initial=100):
return [individual(data) for i in range(initial)]
def fitness(individual, data):
chaine=sentence(individual,words)
somme = 0
for caractere in chaine:
somme = somme + ord(caractere)
print(chaine)
print(somme)
EVOLUTION.append(somme)
return somme
#return sum(data[i][individual[i]] for i in range(len(individual)))
def grade(population, data):
fit = [fitness(ind, data) for ind in population]
return statistics.mean(fit)
def mutate(ind, data):
gene = random.randrange(0, len(ind))
clone = list(ind)
clone[gene] = random.randrange(0, len(data[gene]))
#print(sentence(tuple(clone),words))
return tuple(clone)
def cross(mother, father):
return tuple(round(statistics.mean(genes)) for genes in zip(mother, father))
def sentence(individual, words):
return ' '.join([words[i][individual[i]] for i in range(len(words))])
def evolve(population, data, retain=0.0, random_select=0.00, mutation_rate=0.00):
def cmp_ind(ind):
return fitness(ind, data)
sorted_population = sorted(population, key=cmp_ind, reverse=True)
len_retained = round(len(population) * retain)
retained = sorted_population[:len_retained]
random_selected = [
ind
for ind in sorted_population[len_retained:]
if random.random() <= random_select
]
mutated = [
mutate(ind, data)
for ind in sorted_population[len_retained:]
if random.random() <= mutation_rate
]
children = [
cross(random.choice(sorted_population),
random.choice(sorted_population))
for i in range(len(population) - len(random_selected) - len(mutated))
]
return random_selected + mutated + children
if __name__ == '__main__':
data = [[len(w) for w in ws] for ws in words]
initial_population = population(data, 30)
next_population = initial_population
max_iter = 3
for i in range(max_iter):
next_population = evolve(next_population, data)
sorted_population = sorted(next_population, key=lambda x: fitness(x, data))
best_individual = sorted_population[0]
print("best solution :")
chaine=sentence(best_individual,words)
somme = 0
for caractere in chaine:
somme = somme + ord(caractere)
print(chaine)
print(somme)
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.plot(EVOLUTION)
plt.savefig('myfig')
i want to found highter solution in my fitness function
thanks for advance for your help

How to add a member function to an existing Python object?

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().
An example is:
import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related
class A(object):
"""the neural network for simulation"""
'''
todo:
- find boundary
- get_angles_from_coefficients
'''
def __init__(self,
index, # the index of the current network
list_of_coor_data_files, # accept multiple files of training data
energy_expression_file, # input, output files
preprocessing_settings = None,
connection_between_layers = None, connection_with_bias_layers = None,
PCs = None, # principal components
):
self._index = index
self._list_of_coor_data_files = list_of_coor_data_files
self._energy_expression_file = energy_expression_file
self._data_set = []
for item in list_of_coor_data_files:
self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)
self._preprocessing_settings = preprocessing_settings
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
self._node_num = [8, 15, 2, 15, 8]
self._PCs = PCs
def save_into_file(self, filename = None):
if filename is None:
filename = "network_%s.pkl" % str(self._index) # by default naming with its index
with open(filename, 'wb') as my_file:
pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)
return
def get_cossin_from_a_coordinate(self, a_coordinate):
num_of_coordinates = len(a_coordinate) / 3
a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors
diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors
cos_of_angles = range(len(normal_vectors_normalized_1))
sin_of_angles_vec = range(len(normal_vectors_normalized_1))
sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization
for index in range(len(normal_vectors_normalized_1)):
cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));
return cos_of_angles + sin_of_angles
def get_many_cossin_from_coordinates(self, coordinates):
return map(self.get_cossin_from_a_coordinate, coordinates)
def get_many_cossin_from_coordiantes_in_file (self, filename):
coordinates = np.loadtxt(filename)
return self.get_many_cossin_from_coordinates(coordinates)
def mapminmax(self, my_list): # for preprocessing in network
my_min = min(my_list)
my_max = max(my_list)
mul_factor = 2.0 / (my_max - my_min)
offset = (my_min + my_max) / 2.0
result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
return (result_list, (mul_factor, offset)) # also return the parameters for processing
def get_mapminmax_preprocess_result_and_coeff(self,data=None):
if data is None:
data = self._data_set
data = np.array(data)
data = np.transpose(data)
result = []; params = []
for item in data:
temp_result, preprocess_params = self.mapminmax(item)
result.append(temp_result)
params.append(preprocess_params)
return (np.transpose(np.array(result)), params)
def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
# try begin
if preprocessing_settings is None:
preprocessing_settings = self._preprocessing_settings
temp_setttings = np.transpose(np.array(preprocessing_settings))
result = []
for item in input_data:
item = np.multiply(item - temp_setttings[1], temp_setttings[0])
result.append(item)
return result
# try end
def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
if connection_between_layers is None:
connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None:
connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
expression = ""
# first part: network
for i in range(2):
expression = '\n' + expression
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
bias_coef = connection_with_bias_layers[i].params
for j in range(np.size(mul_coef, 0)):
temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j)
for k in range(np.size(mul_coef, 1)):
temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)
temp_expression += ' %f);\n' % (bias_coef[j])
expression = temp_expression + expression # order of expressions matter in OpenMM
# second part: definition of inputs
index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
for i in range(len(index_of_backbone_atoms) - 3):
index_of_coss = i
index_of_sins = i + 4
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
(i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])
return expression
def write_expression_into_file(self, out_file = None):
if out_file is None: out_file = self._energy_expression_file
expression = self.get_expression_of_network()
with open(out_file, 'w') as f_out:
f_out.write(expression)
return
def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
if input_data is None: input_data = self._data_set
if connection_between_layers is None: connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
temp_mid_result = range(4)
mid_result = []
# first need to do preprocessing
for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):
for i in range(4):
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
bias_coef = connection_with_bias_layers[i].params
previous_result = item if i == 0 else temp_mid_result[i - 1]
temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
if i != 3: # the last output layer is a linear layer, while others are tanh layers
temp_mid_result[i] = map(tanh, temp_mid_result[i])
mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
return mid_result
def get_PC_and_save_it_to_network(self):
'''get PCs and save the result into _PCs
'''
mid_result = self.get_mid_result()
self._PCs = [item[1] for item in mid_result]
return
def train(self):
####################### set up autoencoder begin #######################
node_num = self._node_num
in_layer = LinearLayer(node_num[0], "IL")
hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
out_layer = LinearLayer(node_num[4], "OL")
layer_list = [in_layer] + hidden_layers + [out_layer]
molecule_net = FeedForwardNetwork()
molecule_net.addInputModule(in_layer)
for item in (hidden_layers + bias_layers):
molecule_net.addModule(item)
molecule_net.addOutputModule(out_layer)
connection_between_layers = range(4); connection_with_bias_layers = range(4)
for i in range(4):
connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers
molecule_net.addConnection(connection_with_bias_layers[i])
molecule_net.sortModules() # this is some internal initialization process to make this module usable
####################### set up autoencoder end #######################
trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
data_set = SupervisedDataSet(node_num[0], node_num[4])
sincos = self._data_set
(sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
for item in sincos_after_process: # is it needed?
data_set.addSample(item, item)
trainer.trainUntilConvergence(data_set, maxEpochs=50)
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
print("Done!\n")
return
def create_sge_files_for_simulation(self,potential_centers = None):
if potential_centers is None:
potential_centers = self.get_boundary_points()
neural_network_related.create_sge_files(potential_centers)
return
def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
if list_of_points is None: list_of_points = self._PCs
x = [item[0] for item in list_of_points]
y = [item[1] for item in list_of_points]
temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
hist_matrix = temp[0]
# add a set of zeros around this region
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)
hist_matrix = (hist_matrix != 0).astype(int)
sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]
bin_width_0 = temp[1][1]-temp[1][0]
bin_width_1 = temp[2][1]-temp[2][0]
min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid
min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1
potential_centers = []
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks
temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
potential_centers.append(temp_potential_center)
return potential_centers
# this function is added after those old objects of A were created
def plotting_in_PC_space_with_coloring_option(self,
list_of_coordinate_files_for_plotting=None, # accept multiple files
color_option='pure'):
'''
by default, we are using training data, and we also allow external data input
'''
if list_of_coordinate_files_for_plotting is None:
PCs_to_plot = self._PCs
else:
temp_sincos = []
for item in list_of_coordinate_files_for_plotting:
temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)
temp_mid_result = self.get_mid_result(input_data = temp_sincos)
PCs_to_plot = [item[1] for item in temp_mid_result]
(x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])
# coloring
if color_option == 'pure':
coloring = 'red'
elif color_option == 'step':
coloring = range(len(x))
fig, ax = plt.subplots()
ax.scatter(x,y, c=coloring)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
plt.show()
return
But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?
Thanks!
Something like this:
class A:
def q(self): print 1
a = A()
def f(self): print 2
setattr(A, 'f', f)
a.f()
This is called a monkey patch.

Categories