Python Convert Inches and Feet into Inches - python

I am trying to convert arrays that contain both Inches and Feet into Inches. The feet are denoted with a single quote (') and inches are denoted with double quotes ("). The data comes in the following forms:
[8'x10']
[60\" x 72\"]
[5'x8',5x8,60\"x92\"]
[8'10\"x12']
What I want:
["96x120"]
["60x72"]
["60x96","60x96","60x96","60x92"]
["106x144"]
What I have:
def ft_inch(numbers):
if str(numbers).find("x") > -1:
numbers=numbers.replace('[','').replace('"','').replace('[','').replace(']','')
try:
nom = numbers.split("x")[0]
nom=nom.replace(r'\\|\"|\]|\[','')
nom_one = nom.split("'")[0]
nom_two = nom.split("'")[1]
den = numbers.split("x")[1]
den=den.replace(r'\\|\"|\[|\]','')
den_one = den.split("'")[0]
den_two = den.split("'")[1]
ft=int(nom_one)*12
inch=nom_two.replace(r'\"| |\\','')
try:
inch=int(inch)
except:
print('B')
tmp = int(ft)+int(inch)
fts=int(den_one)*12
inchs=den_two.replace(r'\"| |\\','')
try:
inchs=int(inchs)
except:
print('B')
tmp_two = int(fts)+int(inch)
return f'["{tmp}x{tmp_two}"]'
except:
return numbers
else:
return numbers
x="[5'1x8'1]"
ft_inch(x)
This works for a single array as long as it has both feet and inches but fails if its only feet [8'x8']. If anyone has a simpler solution please let me know

A regex-based approach:
import re
inputs = [["8'1x10'1"], ["60\" x 72\""], ["5'x8'", "5x8", "60\"x92\""], ["8'10\"x12'"]]
for inpt in inputs:
sub_output = []
for measurement in inpt:
m = re.match(r"(\d+['\"]?)(\d+['\"]?)?x(\d+['\"]?)(\d+['\"]?)?",
"".join(measurement.split()))
groups = [m.groups()[:2], m.groups()[2:]]
result_inches = [0, 0]
for i, group in enumerate(groups):
for raw_val in group:
if raw_val == None:
continue
if '"' in raw_val:
result_inches[i] += int(raw_val[:-1])
elif "'" in raw_val:
result_inches[i] += int(raw_val[:-1])*12
else:
result_inches[i] += int(raw_val)*12
sub_output.append(result_inches)
print([f"{x}x{y}" for x, y in sub_output])
Output:
['108x132']
['60x72']
['60x96', '60x96', '60x92']
['106x144']
I saw your edit and included the ["8'1x10'1"] case :)

I rewrote the entire thing, but this seems to work:
input_ = ["8'x10'", "60\" x 72\"", "5'x8'","5x8","60\"x92\"", "8'10\"x12'", "8'1x10'1\""]
inches_only = []
for s in input_:
s.replace(" ", "")
sides = s.split("x")
new_sides = []
for side in sides:
inches = 0
split1 = side.split("'")
if len(split1) > 1 or (len(split1) == 1 and not side.__contains__('"')):
inches = int(split1[0]) * 12
split2 = side.split('"')
if len(split2) > 1:
inches += int(split2[0].split("'")[-1])
elif len(split2) == 1 and len(split1) > 1 and len(split1[1]) > 0:
inches += int(split1[1])
new_sides.append(str(inches) + '"')
inches_only.append("x".join(new_sides))
print(inches_only)
Output:
['96"x120"', '60"x72"', '60"x96"', '60"x96"', '60"x92"', '106"x144"', '97"x121"']

Related

Pivot function results differ from TradingView

I'm using this code to calculate pivot points.
def pivots_low(osc, LBR, LBL):
pivots = []
for i in range(len(osc) - LBR):
pivots.append(0)
pivot = True
if i > LBL:
for j in range(1, LBR + 1):
if osc[i] >= osc[i + j]:
pivot = False
for j in range(1, LBL + 1):
if osc[i] > osc[i - j]:
pivot = False
if pivot is True:
pivots[len(pivots) - 1] = osc[i]
for i in range(LBR):
pivots.append(0)
return pivots
This returns an array with 0's where there's no pivots and the value of the pivot if there is one.
When Comparing the results to TradingView (downloaded csv with pivot points), the only time it matches exactly is when lookback left and right are both 5. Otherwise it deviates in the number of total pivots and the location of some.
But using this code to calculate pivot highs:
def pivots_high(osc, LBR, LBL):
pivots = []
for i in range(len(osc)-LBR):
pivots.append(0)
pivot = True
if i > LBL:
for j in range(1,LBL + 1):
if osc[i] < osc[i-j]:
pivot = False
for j in range(1,LBR + 1):
if osc[i] <= osc[i+j]:
pivot = False
if pivot is True:
pivots[len(pivots)-1] = osc[i]
for i in range(LBR):
pivots.append(0)
return pivots
the results are perfect regardless of lookback values. But the code is almost exactly the same besides comparison.
What is going wrong here? This is day 3 of having this problem and I just cant fix it
To Reproduce:
Load Data:
Full_Data = pd.read_csv(file)
use this simple function to check matches between calculated pivots and TradingView pivots.
def match_pivs(data, pivs_h, pivs_l): //Data is a DataFrame loaded from tradingview csv
global lblh
global lbrh
global lbll
global lbrl
start = lbrh
if lbrl > lbrh:
start = lbrl
match_h = 0
tot_hd = 0
tot_hp = 0
match_l = 0
tot_ld = 0
tot_lp = 0
for i in range(start, len(data)):
if data['PivHigh'][i] != 0 and pivs_h[i-lbrh] != 0:
match_h += 1
if data['PivLow'][i] != 0 and pivs_l[i-lbrl] != 0:
match_l += 1
if data['PivHigh'][i] != 0:
tot_hd += 1
if data['PivLow'][i] != 0:
tot_ld += 1
if pivs_h[i] != 0:
tot_hp += 1
if pivs_l[i] != 0:
tot_lp += 1
print('PivsLow ' + str(tot_lp))
print('DataLows ' + str(tot_ld))
print('MatchesL ' + str(match_l))
print('PivsHigh ' + str(tot_hp))
print('DataHighs ' + str(tot_hd))
print('MatchesH ' + str(match_h))
and to get csv from TradingView:
//#version=5
indicator("Data Script", overlay=true, max_labels_count=500)
leftLenL = input.int(title="Pivot Low", defval=10, minval=1, inline="Pivot Low", group=lengthGroupTitle)
rightLenL = input.int(title="/", defval=10, minval=1, inline="Pivot Low", group=lengthGroupTitle)
leftLenH = input.int(title="Pivot High", defval=10, minval=1, inline="Pivot High", group=lengthGroupTitle)
rightLenH = input.int(title="/", defval=10, minval=1, inline="Pivot High", group=lengthGroupTitle)
ph = ta.pivothigh(leftLenH, rightLenH)
pl = ta.pivotlow(leftLenL, rightLenL)
if not na(ph)
plth := ph
else
plth := 0.0
if not na(pl)
pltl := pl
else
pltl := 0.0
plot(plth, 'PivHigh')
plot(pltl, 'PivLow')
then just download csv with this script loaded.
Run program with these three lines:
pl = pivots_low(Full_Data['low'], lbll, lbrl)
ph = pivots_high(Full_Data['high'], lbrh, lblh)
match_pivs(Full_Data, ph, pl)
Finally found a way.
I still have no idea why that code does not work but I've made a different way that seems to be doing the job 100% to tradingview data.
def checkhl(data_back, data_forward, hl):
if hl == 'high' or hl == 'High':
ref = data_back[len(data_back)-1]
for i in range(len(data_back)-1):
if ref < data_back[i]:
return 0
for i in range(len(data_forward)):
if ref <= data_forward[i]:
return 0
return 1
if hl == 'low' or hl == 'Low':
ref = data_back[len(data_back)-1]
for i in range(len(data_back)-1):
if ref > data_back[i]:
return 0
for i in range(len(data_forward)):
if ref >= data_forward[i]:
return 0
return 1
def pivot(osc, LBL, LBR, highlow)
left = []
right = []
for i in range(len(osc)):
pivots.append(0.0)
if i < LBL + 1:
left.append(osc[i])
if i > LBL:
right.append(osc[i])
if i > LBL + LBR:
left.append(right[0])
left.pop(0)
right.pop(0)
if checkhl(left, right, highlow):
pivots[i - LBR] = osc[i - LBR]
return pivots
then just do:
pivots_low = pivot(data, lbl, lbr, 'low')
pivots_high = pivot(data, lbl, lbr, 'high')
All the pivots will be in the actual position that they occur, not lbr bars after, otherwise the value will be 0.0
I'm not sure if this is efficient or not but it seems to work.

how to print in color a selected element from input

I have a list of data (extracted from a .cv) and I want to print a list of average with the inputed data in other color. I was able to do everything, but the color is messing me up.
Can an angel help me on how to put in this code?
(i just paste the final print part)
import colorama
from colorama import Fore, Back, Style
prefage = []
prefcheck = input("Choose a prefecture: ")
print("MINORITY RATIO IN DESCENDING ORDER WITH SELECTED PREFECTURE IN RED")
for pref in range(0, len(prefage)) :
print(prefage[pref][0].rjust(10), end=" ")
for minor in range(1, len(prefage[0])) :
print(prefage[pref][minor].rjust(10))
else:
print("Sorry, no prefecture with that name.")
Before this idea i was just printing the result in a very simple way:
for x in prefage:
if prefcheck in x:
print(x)
break
(removed second question)
EDIT:
the entire code is this:
import colorama
from colorama import Fore, Back, Style
table = []
f = open("population.csv", "r")
for line in f :
line = line.rstrip("\r\n")
field = line.split(",")
table.append(field) ##### field is a list
f.close()
#AVERAGE AGE MATH FOR EACH PREFECTURE
agelist = [ "Age" ]
ag = 2.5
for i in range(1, len(table[1])) :
agelist.append(ag)
ag += 5
#PRINT AVERAGE AGE LIST
prefage = []
for pref in range(1, len(table)) :
sum = 0
asum = 0
ysum = 0
for age in range(1, len(table[1])):
sum += int(table[pref][age])
asum += int(table[pref][age]) * agelist[age]
average = round(asum / sum, 1)
for age in range(1, 5):
ysum += int(table[pref][age])
yave = round(ysum*100 / sum, 1)
prefage.append([(table[pref][0]), str(yave)])
#SORT AVARAGE IN DESCENDING ORDER
n = len(prefage)
while n > 1 :
for p in range(0, n - 1) :
if prefage[p][1] < prefage[p+1][1] :
prefage[p], prefage[p+1] = prefage[p+1], prefage[p]
n = n -1
print("MINORITY RATIO IN DESCENDING ORDER:")
for pref in range(0, len(prefage)) :
print(prefage[pref][0].rjust(10), end=" ")
for avnum in range(1, len(prefage[0])) :
print(prefage[pref][avnum].rjust(10))
print()
prefcheck = input("Choose a prefecture: ")
for x in prefage:
if prefcheck in x:
print(x)
break
else:
print("Sorry, no prefecture with that name")
You can do
print(Fore.RED, x, Fore.RESET)
but it will add spaces between value and colors so better use f-string
print( f"{Fore.RED}{x}{Fore.RESET}" )
And then you can use f-string to justify: {x:>10}, {x:<10}, {x:^10}
from colorama import Fore, Back, Style
x = 10
print('|', x, '|') # without color
print('|', Fore.RED, x, Fore.RESET, '|') # with color
print( f'|{Fore.RED}{x}{Fore.RESET}|' ) # f-string
print( f'|{Fore.RED}{x:>10}{Fore.RESET}|' )
print( f'|{Fore.RED}{x:<10}{Fore.RESET}|' )
print( f'|{Fore.RED}{x:^10}{Fore.RESET}|' )
Result:
| 10 | # without colors
| 10 | # with colors
|10| # f-string with colors
| 10| # x:>10
|10 | # x:<10
| 10 | # x:^10
You can also use variable to set justify - with nested { }
max_len = 10
print( f'|{Fore.RED}{x:^{max_len}}{Fore.RESET}|' )
EDIT:
If you want bright colors then you can add Style.BRIGHT and remove it with Style.NORMAL
print( f'|{Fore.RED+Style.BRIGHT}{x:^10}{Fore.RESET+Style.NORMAL}|' )
You may also use variables
start_color = Fore.RED+Style.BRIGHT
reset_color = Fore.RESET+Style.NORMAL
print( f'|{start_color}{x:^10}{reset_color}|' )
So you can use it with if/else
if prefcheck in x:
start_color = Fore.RED+Style.BRIGHT
reset_color = Fore.RESET+Style.NORMAL
else:
start_color = ''
reset_color = ''
print( f'|{start_color}{x:^10}{reset_color}|' )
Or define shorter names
CR = Fore.RED+Style.BRIGHT # Color Red
CG = Fore.GREEN+Style.BRIGHT # Color Green
CX = Fore.RESET+Style.NORMAL # Color reset
if prefcheck in x:
start_color = CR
reset_color = CX
else:
start_color = ''
reset_color = ''
print( f'|{start_color}{x:^10}{reset_color}|' )

Replacing each character/string in a list one at a time with python

I have a string "MQADKVMEPT" and the desired output I want is:
.QADKVMEPT
M.ADKVMEPT
MQ.DKVMEPT
MQA.KVMEPT
MQAD.VMEPT
MQADK.MEPT
MQADKV.EPT
MQADKVM.PT
MQADKVME.T
MQADKVMEP.
Using this code:
motif = 'MQADKVMEPT'
motiflist = list(motif)
pos = 0
for aa in motiflist:
motiflist[pos] = '.'
pos += 1
str = ''
for a in motiflist:
str += a
print(str)
My output is:
.QADKVMEPT
..ADKVMEPT
...DKVMEPT
....KVMEPT
.....VMEPT
......MEPT
.......EPT
........PT
.........T
..........
How do I reinitialize the original motiflist so that it doesn't give me this output?
"Quick" fix would be copy the original list. Using your code:
motif = "MQADKVMEPT"
motiflist = list(motif)
pos = 0
for aa in motiflist:
motiflist_copy = motiflist.copy() # <--- copy the original list
motiflist_copy[pos] = "."
pos += 1
s = ""
for a in motiflist_copy:
s += a
print(s)
Prints:
.QADKVMEPT
M.ADKVMEPT
MQ.DKVMEPT
MQA.KVMEPT
MQAD.VMEPT
MQADK.MEPT
MQADKV.EPT
MQADKVM.PT
MQADKVME.T
MQADKVMEP.
Shorter solution:
motif = "MQADKVMEPT"
for i in range(len(motif)):
s = motif[:i] + "." + motif[i + 1 :]
print(s)

How can i create an input for choosing different files to access?

I am quite new to python so please bear with me.
Currently, this is my code:
import pandas as pd
import statistics
import matplotlib.pyplot as plt
import math
from datetime import datetime
start_time = datetime.now()
gf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
bf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
df = (input("Which data set? "))
magnitudes = (df['Magnitude '].values)
times = df['Time '].values
average = statistics.mean(magnitudes)
sd = statistics.stdev(magnitudes)
below = sd * 3
class data_set:
def __init__(self, index):
self.mags = []
self.i = index
self.mid_time = df['Time '][index]
self.mid_mag = df['Magnitude '][index]
self.times = []
ran = 80
for ii in range(ran):
self.times.append(df['Time '][self.i + ii - ran / 2])
self.mags.append(df['Magnitude '][self.i + ii - ran / 2])
data = []
today = float(input("What is the range? "))
i = 0
while (i < len(df['Magnitude '])):
if (abs(df['Magnitude '][i]) <= (average - below)):
# check if neighbours
t = df['Time '][i]
tt = True
for d in range(len(data)):
if abs(t - data[d].mid_time) <= today:
# check if closer to center
if df['Magnitude '][i] < data[d].mid_mag:
data[d] = data_set(i)
print("here")
tt = False
break
if tt:
data.append(data_set(i))
i += 1
print("found values")
# graphing
height = 2 # Change this for number of columns
width = math.ceil(len(data) / height)
if width < 2:
width = 2
fig, axes = plt.subplots(width, height, figsize=(30, 30))
row = 0
col = 0
for i in range(len(data)):
axes[row][col].plot(data[i].times, data[i].mags)
col += 1
if col > height - 1:
col = 0
row += 1
plt.show()
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))
Currently, the error produced is this:
/Users/aaronhuang/.conda/envs/EXTTEst/bin/python "/Users/aaronhuang/PycharmProjects/EXTTEst/Code sandbox.py"
Which data set? gf
Traceback (most recent call last):
File "/Users/aaronhuang/PycharmProjects/EXTTEst/Code sandbox.py", line 14, in <module>
magnitudes = int(df['Magnitude '].values)
TypeError: string indices must be integers
Process finished with exit code 1
I am trying to have the user be able to choose which file to access to perform the rest of the code on.
So if the user types gf I would like the code to access the first data file.
Any help would be appreciated. Thank you
Why not use an if-statement at the beginning? Try this:
instead of:
gf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
bf = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
df = (input("Which data set? "))
Use this:
choice = input("Which data set? ")
if choice == "gf":
df = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/exfileCLEAN2.csv",
skiprows=[1])
elif choice == "bf":
df = pd.read_csv(r"/Users/aaronhuang/Documents/Desktop/ffp/2SeconddatasetCLEAN.csv",
skiprows=[1])
else:
print("Error. Your choice is not valid")
df = ""
break

How to add a member function to an existing Python object?

Previously I created a lot of Python objects of class A, and I would like to add a new function plotting_in_PC_space_with_coloring_option() (the purpose of this function is to plot some data in this object) to class A and use those old objects to call plotting_in_PC_space_with_coloring_option().
An example is:
import copy
import numpy as np
from math import *
from pybrain.structure import *
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.datasets.supervised import SupervisedDataSet
import pickle
import neural_network_related
class A(object):
"""the neural network for simulation"""
'''
todo:
- find boundary
- get_angles_from_coefficients
'''
def __init__(self,
index, # the index of the current network
list_of_coor_data_files, # accept multiple files of training data
energy_expression_file, # input, output files
preprocessing_settings = None,
connection_between_layers = None, connection_with_bias_layers = None,
PCs = None, # principal components
):
self._index = index
self._list_of_coor_data_files = list_of_coor_data_files
self._energy_expression_file = energy_expression_file
self._data_set = []
for item in list_of_coor_data_files:
self._data_set += self.get_many_cossin_from_coordiantes_in_file(item)
self._preprocessing_settings = preprocessing_settings
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
self._node_num = [8, 15, 2, 15, 8]
self._PCs = PCs
def save_into_file(self, filename = None):
if filename is None:
filename = "network_%s.pkl" % str(self._index) # by default naming with its index
with open(filename, 'wb') as my_file:
pickle.dump(self, my_file, pickle.HIGHEST_PROTOCOL)
return
def get_cossin_from_a_coordinate(self, a_coordinate):
num_of_coordinates = len(a_coordinate) / 3
a_coordinate = np.array(a_coordinate).reshape(num_of_coordinates, 3)
diff_coordinates = a_coordinate[1:num_of_coordinates, :] - a_coordinate[0:num_of_coordinates - 1,:] # bond vectors
diff_coordinates_1=diff_coordinates[0:num_of_coordinates-2,:];diff_coordinates_2=diff_coordinates[1:num_of_coordinates-1,:]
normal_vectors = np.cross(diff_coordinates_1, diff_coordinates_2);
normal_vectors_normalized = np.array(map(lambda x: x / sqrt(np.dot(x,x)), normal_vectors))
normal_vectors_normalized_1 = normal_vectors_normalized[0:num_of_coordinates-3, :];normal_vectors_normalized_2 = normal_vectors_normalized[1:num_of_coordinates-2,:];
diff_coordinates_mid = diff_coordinates[1:num_of_coordinates-2]; # these are bond vectors in the middle (remove the first and last one), they should be perpendicular to adjacent normal vectors
cos_of_angles = range(len(normal_vectors_normalized_1))
sin_of_angles_vec = range(len(normal_vectors_normalized_1))
sin_of_angles = range(len(normal_vectors_normalized_1)) # initialization
for index in range(len(normal_vectors_normalized_1)):
cos_of_angles[index] = np.dot(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles_vec[index] = np.cross(normal_vectors_normalized_1[index], normal_vectors_normalized_2[index])
sin_of_angles[index] = sqrt(np.dot(sin_of_angles_vec[index], sin_of_angles_vec[index])) * np.sign(sum(sin_of_angles_vec[index]) * sum(diff_coordinates_mid[index]));
return cos_of_angles + sin_of_angles
def get_many_cossin_from_coordinates(self, coordinates):
return map(self.get_cossin_from_a_coordinate, coordinates)
def get_many_cossin_from_coordiantes_in_file (self, filename):
coordinates = np.loadtxt(filename)
return self.get_many_cossin_from_coordinates(coordinates)
def mapminmax(self, my_list): # for preprocessing in network
my_min = min(my_list)
my_max = max(my_list)
mul_factor = 2.0 / (my_max - my_min)
offset = (my_min + my_max) / 2.0
result_list = np.array(map(lambda x : (x - offset) * mul_factor, my_list))
return (result_list, (mul_factor, offset)) # also return the parameters for processing
def get_mapminmax_preprocess_result_and_coeff(self,data=None):
if data is None:
data = self._data_set
data = np.array(data)
data = np.transpose(data)
result = []; params = []
for item in data:
temp_result, preprocess_params = self.mapminmax(item)
result.append(temp_result)
params.append(preprocess_params)
return (np.transpose(np.array(result)), params)
def mapminmax_preprocess_using_coeff(self, input_data=None, preprocessing_settings=None):
# try begin
if preprocessing_settings is None:
preprocessing_settings = self._preprocessing_settings
temp_setttings = np.transpose(np.array(preprocessing_settings))
result = []
for item in input_data:
item = np.multiply(item - temp_setttings[1], temp_setttings[0])
result.append(item)
return result
# try end
def get_expression_of_network(self, connection_between_layers=None, connection_with_bias_layers=None):
if connection_between_layers is None:
connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None:
connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
expression = ""
# first part: network
for i in range(2):
expression = '\n' + expression
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i])
bias_coef = connection_with_bias_layers[i].params
for j in range(np.size(mul_coef, 0)):
temp_expression = 'layer_%d_unit_%d = tanh( ' % (i + 1, j)
for k in range(np.size(mul_coef, 1)):
temp_expression += ' %f * layer_%d_unit_%d +' % (mul_coef[j, k], i, k)
temp_expression += ' %f);\n' % (bias_coef[j])
expression = temp_expression + expression # order of expressions matter in OpenMM
# second part: definition of inputs
index_of_backbone_atoms = [2, 5, 7, 9, 15, 17, 19];
for i in range(len(index_of_backbone_atoms) - 3):
index_of_coss = i
index_of_sins = i + 4
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_coss, index_of_coss, self._preprocessing_settings[index_of_coss][1], self._preprocessing_settings[index_of_coss][0])
expression += 'layer_0_unit_%d = (raw_layer_0_unit_%d - %f) * %f;\n' % \
(index_of_sins, index_of_sins, self._preprocessing_settings[index_of_sins][1], self._preprocessing_settings[index_of_sins][0])
expression += 'raw_layer_0_unit_%d = cos(dihedral_angle_%d);\n' % (index_of_coss, i)
expression += 'raw_layer_0_unit_%d = sin(dihedral_angle_%d);\n' % (index_of_sins, i)
expression += 'dihedral_angle_%d = dihedral(p%d, p%d, p%d, p%d);\n' % \
(i, index_of_backbone_atoms[i], index_of_backbone_atoms[i+1],index_of_backbone_atoms[i+2],index_of_backbone_atoms[i+3])
return expression
def write_expression_into_file(self, out_file = None):
if out_file is None: out_file = self._energy_expression_file
expression = self.get_expression_of_network()
with open(out_file, 'w') as f_out:
f_out.write(expression)
return
def get_mid_result(self, input_data=None, connection_between_layers=None, connection_with_bias_layers=None):
if input_data is None: input_data = self._data_set
if connection_between_layers is None: connection_between_layers = self._connection_between_layers
if connection_with_bias_layers is None: connection_with_bias_layers = self._connection_with_bias_layers
node_num = self._node_num
temp_mid_result = range(4)
mid_result = []
# first need to do preprocessing
for item in self.mapminmax_preprocess_using_coeff(input_data, self._preprocessing_settings):
for i in range(4):
mul_coef = connection_between_layers[i].params.reshape(node_num[i + 1], node_num[i]) # fix node_num
bias_coef = connection_with_bias_layers[i].params
previous_result = item if i == 0 else temp_mid_result[i - 1]
temp_mid_result[i] = np.dot(mul_coef, previous_result) + bias_coef
if i != 3: # the last output layer is a linear layer, while others are tanh layers
temp_mid_result[i] = map(tanh, temp_mid_result[i])
mid_result.append(copy.deepcopy(temp_mid_result)) # note that should use deepcopy
return mid_result
def get_PC_and_save_it_to_network(self):
'''get PCs and save the result into _PCs
'''
mid_result = self.get_mid_result()
self._PCs = [item[1] for item in mid_result]
return
def train(self):
####################### set up autoencoder begin #######################
node_num = self._node_num
in_layer = LinearLayer(node_num[0], "IL")
hidden_layers = [TanhLayer(node_num[1], "HL1"), TanhLayer(node_num[2], "HL2"), TanhLayer(node_num[3], "HL3")]
bias_layers = [BiasUnit("B1"),BiasUnit("B2"),BiasUnit("B3"),BiasUnit("B4")]
out_layer = LinearLayer(node_num[4], "OL")
layer_list = [in_layer] + hidden_layers + [out_layer]
molecule_net = FeedForwardNetwork()
molecule_net.addInputModule(in_layer)
for item in (hidden_layers + bias_layers):
molecule_net.addModule(item)
molecule_net.addOutputModule(out_layer)
connection_between_layers = range(4); connection_with_bias_layers = range(4)
for i in range(4):
connection_between_layers[i] = FullConnection(layer_list[i], layer_list[i+1])
connection_with_bias_layers[i] = FullConnection(bias_layers[i], layer_list[i+1])
molecule_net.addConnection(connection_between_layers[i]) # connect two neighbor layers
molecule_net.addConnection(connection_with_bias_layers[i])
molecule_net.sortModules() # this is some internal initialization process to make this module usable
####################### set up autoencoder end #######################
trainer = BackpropTrainer(molecule_net, learningrate=0.002,momentum=0.4,verbose=False, weightdecay=0.1, lrdecay=1)
data_set = SupervisedDataSet(node_num[0], node_num[4])
sincos = self._data_set
(sincos_after_process, self._preprocessing_settings) = self.get_mapminmax_preprocess_result_and_coeff(data = sincos)
for item in sincos_after_process: # is it needed?
data_set.addSample(item, item)
trainer.trainUntilConvergence(data_set, maxEpochs=50)
self._connection_between_layers = connection_between_layers
self._connection_with_bias_layers = connection_with_bias_layers
print("Done!\n")
return
def create_sge_files_for_simulation(self,potential_centers = None):
if potential_centers is None:
potential_centers = self.get_boundary_points()
neural_network_related.create_sge_files(potential_centers)
return
def get_boundary_points(self, list_of_points = None, num_of_bins = 5):
if list_of_points is None: list_of_points = self._PCs
x = [item[0] for item in list_of_points]
y = [item[1] for item in list_of_points]
temp = np.histogram2d(x,y, bins=[num_of_bins, num_of_bins])
hist_matrix = temp[0]
# add a set of zeros around this region
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins), 0)
hist_matrix = np.insert(hist_matrix, num_of_bins, np.zeros(num_of_bins + 2), 1)
hist_matrix = np.insert(hist_matrix, 0, np.zeros(num_of_bins +2), 1)
hist_matrix = (hist_matrix != 0).astype(int)
sum_of_neighbors = np.zeros(np.shape(hist_matrix)) # number of neighbors occupied with some points
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if i != 0: sum_of_neighbors[i,j] += hist_matrix[i - 1][j]
if j != 0: sum_of_neighbors[i,j] += hist_matrix[i][j - 1]
if i != np.shape(hist_matrix)[0] - 1: sum_of_neighbors[i,j] += hist_matrix[i + 1][j]
if j != np.shape(hist_matrix)[1] - 1: sum_of_neighbors[i,j] += hist_matrix[i][j + 1]
bin_width_0 = temp[1][1]-temp[1][0]
bin_width_1 = temp[2][1]-temp[2][0]
min_coor_in_PC_space_0 = temp[1][0] - 0.5 * bin_width_0 # multiply by 0.5 since we want the center of the grid
min_coor_in_PC_space_1 = temp[2][0] - 0.5 * bin_width_1
potential_centers = []
for i in range(np.shape(hist_matrix)[0]):
for j in range(np.shape(hist_matrix)[1]):
if hist_matrix[i,j] == 0 and sum_of_neighbors[i,j] != 0: # no points in this block but there are points in neighboring blocks
temp_potential_center = [round(min_coor_in_PC_space_0 + i * bin_width_0, 2), round(min_coor_in_PC_space_1 + j * bin_width_1, 2)]
potential_centers.append(temp_potential_center)
return potential_centers
# this function is added after those old objects of A were created
def plotting_in_PC_space_with_coloring_option(self,
list_of_coordinate_files_for_plotting=None, # accept multiple files
color_option='pure'):
'''
by default, we are using training data, and we also allow external data input
'''
if list_of_coordinate_files_for_plotting is None:
PCs_to_plot = self._PCs
else:
temp_sincos = []
for item in list_of_coordinate_files_for_plotting:
temp_sincos += self.get_many_cossin_from_coordiantes_in_file(item)
temp_mid_result = self.get_mid_result(input_data = temp_sincos)
PCs_to_plot = [item[1] for item in temp_mid_result]
(x, y) = ([item[0] for item in PCs_to_plot], [item[1] for item in PCs_to_plot])
# coloring
if color_option == 'pure':
coloring = 'red'
elif color_option == 'step':
coloring = range(len(x))
fig, ax = plt.subplots()
ax.scatter(x,y, c=coloring)
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
plt.show()
return
But it seems that plotting_in_PC_space_with_coloring_option() was not binded to those old objects, is here any way to fix it (I do not want to recreate these objects since creation involves CPU-intensive calculation and would take very long time to do it)?
Thanks!
Something like this:
class A:
def q(self): print 1
a = A()
def f(self): print 2
setattr(A, 'f', f)
a.f()
This is called a monkey patch.

Categories