NLTK output changes when using the same input on the same machine - python

The next piece of code returns different output with the same input(self.SynSets)
Why can it be happening? Am I doing something wrong? or is it caused by python?
def FilterSynSets(self):
self.filteredSysNets = {}
for synset in self.SysNets:
for subsynset in self.SysNets:
length = wn.path_similarity(synset,subsynset)
if not length is None\
and length !=1\
and length >0:
target = synset.__str__().replace("'","")
source =subsynset.__str__().replace("'","")
connection="\"{}\"->\"{}\" [label={}]".format(
target,
source,
str(round(length,3)))
self.filteredSysNets[connection] = length
oldLength = len(self.filteredSysNets)
avarageVal = sum(self.filteredSysNets.values())/len(self.filteredSysNets)
self.filteredSysNets = {k: v for k,v in self.filteredSysNets.items() if v>=avarageVal}
newLength = len(self.filteredSysNets)
prt = newLength/oldLength*100
print ("avr -{}\n old -{}\n new - {}\n prec={}".format(avarageVal,oldLength,newLength,prt))
return self
Outputs:
http://screencast.com/t/eFMOROfkPXR
http://screencast.com/t/Fdd6ufhA

Related

OSError: The topology is loaded by filename extension, and the detected ".pd" format is not supported

I'm writing a code to generate features of ligands. Code is:
def parsePDB(self):
top = self.pdb.topology
residues = [str(residue)[:3] for residue in top.residues]
residues_cp = residues.copy()
# number of all groups in the complex except the ligand
LIG_number = residues.index('LIG')
self.residues_indices = [i for i in range(top.n_residues) if i != LIG_number]
# Get the types of atoms in the protein and ligand
self.rec_indices = top.select('protein')
self.lig_indices = top.select('resname LIG')
table, bond = top.to_dataframe()
self.all_ele = table['element']
self.lig_ele = table['element'][self.lig_indices]
H_num = []
for num, i in enumerate(self.all_ele):
if i == 'H':
H_num.append(num)
# Get the serial number of the atom in each residue or group
removes = []
for i in self.residues_indices:
atoms = top.residue(i).atoms
each_atoms = [j.index for j in atoms]
heavy_atoms = [x for x in each_atoms if not x in H_num]
if len(heavy_atoms) == 0:
removes.append(i)
else:
self.atoms_indices.append(heavy_atoms)
if len(removes) != 0:
for i in removes:
self.residues_indices.remove(i)
self.residues = [residues_cp[x] for x in self.residues_indices]
# Get the 3D coordinates for all atoms
self.xyz = self.pdb.xyz[0]
return self
I'm getting this error:
OSError: The topology is loaded by filename extension, and the detected ".pd" format is not supported. Supported topology formats include ".pdb", ".pdb.gz", ".h5", ".lh5", ".prmtop", ".parm7", ".prm7", ".psf", ".mol2", ".hoomdxml", ".gro", ".arc", ".hdf5" and ".gsd".
Can anyone please suggest me how to resolve this issue? Thanks

The program adds "" to the string I'm indexing

Im using this piece of code
shares_one = [(None,), ('200 # 496',), ('200 # 486',)]
print(shares_one)
variable_one = StringVar(add_sale_quantity_selector)
variable_one.set(shares_one[1]) # default value
y = OptionMenu(add_sale_quantity_selector, variable_one, *shares_one)
y.place(x=25, y=40)
x = (variable_one.get())
bad_chars = ['"']
for i in bad_chars:
test_string = x.replace(i, '')
print (test_string)
index = shares_one.index(test_string)
print(index)
but it gives me the following error
ValueError: "('200 # 496',)" is not in list
for the line index = shares_one.index(test_string)
whats the possible solution, sorry if anythings out of the picture.

How can I initialize dictionary in Python where key is an object?

I am trying to rephrase the implementation found here. This is what I have so far:
import csv
import math
import random
training_set_ratio = 0.67
training_set = []
test_set = []
class IrisFlower:
def __init__(self, petal_length, petal_width, sepal_length, sepal_width, flower_type):
self.petal_length = petal_length
self.petal_width = petal_width
self.sepal_length = sepal_length
self.sepal_width = sepal_width
self.flower_type = flower_type
def __hash__(self) -> int:
return hash((self.petal_length, self.petal_width, self.sepal_length, self.sepal_width))
def __eq__(self, other):
return (self.petal_length, self.petal_width, self.sepal_length, self.sepal_width) \
== (other.petal_length, other.petal_width, other.sepal_length, other.sepal_width)
def load_data():
with open('dataset.csv') as csvfile:
rows = csv.reader(csvfile, delimiter=',')
for row in rows:
iris_flower = IrisFlower(float(row[0]), float(row[1]), float(row[2]), float(row[3]), row[4])
if random.random() < training_set_ratio:
training_set.append(iris_flower)
else:
test_set.append(iris_flower)
def euclidean_distance(flower_one: IrisFlower, flower_two: IrisFlower):
distance = 0.0
distance = distance + math.pow(flower_one.petal_length - flower_two.petal_length, 2)
distance = distance + math.pow(flower_one.petal_width - flower_two.petal_width, 2)
distance = distance + math.pow(flower_one.sepal_length - flower_two.sepal_length, 2)
distance = distance + math.pow(flower_one.sepal_width - flower_two.sepal_width, 2)
return distance
def get_neighbors(test_flower: IrisFlower):
distances = []
for training_flower in training_set:
dist = euclidean_distance(test_flower, training_flower)
d = dict()
d[training_flower] = dist
print(d)
return
load_data()
get_neighbors(test_set[0])
Currently, print statements in the following code block:
def get_neighbors(test_flower: IrisFlower):
distances = []
for training_flower in training_set:
dist = euclidean_distance(test_flower, training_flower)
d = dict()
d[training_flower] = dist
print(d)
return
will have outputs similar to
{<__main__.IrisFlower object at 0x107774fd0>: 0.25999999999999945}
which is ok. But I do not want to create the dictionary first, and then append the key value, as in:
d = dict()
d[training_flower] = dist
So this is what I am trying:
d = dict(training_flower = dist)
However, it does not seem like the dist method is using the instance, but rather a String, because what I see printed is as follows:
{'training_flower': 23.409999999999997}
{'training_flower': 16.689999999999998}
How do I create the dictionary by using the object as key in one statement?
In your snippet, where you write d = dict(training_flower=dist), "training_flower" is a keyword argument for dict function and not an object. It is equivalent to writing d = {'training_flower': dist}. The only way to create a dictionary with an object as a key is to use the latter syntax:
d = {training_flower: dist}
To directly create a dict with a key which is not a valid keyword, use the {} syntax like:
Code:
d = {training_flower: 'a_value'}
Test Code:
training_flower = 'a key'
d = {training_flower: 'a_value'}
print(d)
Results:
{'a key': 'a_value'}
to initialize a dictionary with an object as a key, (edit: and the string in Stephen's example is an object anyway)
class Flower:
def __repr__(self):
return 'i am flower'
flower1 = Flower()
d = {flower1: 4}
print(d)
outputs
{i am flower: 4}
this is my first post here, and I know I'm late, sorry if it's a duplicate solution. just to show it works with an object.
would upvote Stephen's answer but I can't yet.

ID3 Algorithm in Python

I am trying to plot a decision tree using ID3 in Python. I am really new to Python and couldn't understand the implementation of the following code. I need to know how I can apply this code to my data.
from math import log
import operator
def entropy(data):
entries = len(data)
labels = {}
for feat in data:
label = feat[-1]
if label not in labels.keys():
labels[label] = 0
labels[label] += 1
entropy = 0.0
for key in labels:
probability = float(labels[key])/entries
entropy -= probability * log(probability,2)
return entropy
def split(data, axis, val):
newData = []
for feat in data:
if feat[axis] == val:
reducedFeat = feat[:axis]
reducedFeat.extend(feat[axis+1:])
newData.append(reducedFeat)
return newData
def choose(data):
features = len(data[0]) - 1
baseEntropy = entropy(data)
bestInfoGain = 0.0;
bestFeat = -1
for i in range(features):
featList = [ex[i] for ex in data]
uniqueVals = set(featList)
newEntropy = 0.0
for value in uniqueVals:
newData = split(data, i, value)
probability = len(newData)/float(len(data))
newEntropy += probability * entropy(newData)
infoGain = baseEntropy - newEntropy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeat = i
return bestFeat
def majority(classList):
classCount={}
for vote in classList:
if vote not in classCount.keys(): classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def tree(data,labels):
classList = [ex[-1] for ex in data]
if classList.count(classList[0]) == len(classList):
return classList[0]
if len(data[0]) == 1:
return majority(classList)
bestFeat = choose(data)
bestFeatLabel = labels[bestFeat]
theTree = {bestFeatLabel:{}}
del(labels[bestFeat])
featValues = [ex[bestFeat] for ex in data]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
theTree[bestFeatLabel][value] = tree(split/(data, bestFeat, value),subLabels)
return theTree
So what I did after this is the following:
infile=open("SData.csv","r")
data=infile.read()
tree(data)
The error which I got is "1 argument is missing" which is the label which I have to define and this is where I don't know what I have to put. I tried the variable for which I have to make the decision tree but it doesn't work:
tree(data,MinTemp)
Here I get an error "MinTemp is not defined".
Please help me out and let me know what I should do to have a look at the tree.
Following is the part of data and I want to generate a tree for MinTemp
MinTemp,Rainfall,Tempat9,RHat9,CAat9,WSat9
high,no,mild,normal,overcast,weak
high,no,mild,normal,cloudy,weak
high,no,mild,normal,cloudy,mild
high,yes,mild,high,cloudy,weak
high,yes,mild,high,cloudy,mild
medium,yes,mild,high,cloudy,mild
high,no,mild,high,overcast,weak
high,no,mild,normal,sunny,weak
high,no,hot,normal,sunny,weak
high,no,hot,normal,overcast,weak

How do I raise the memory limit in python?

I have written a python (2.7) script but it use a lot of memory so I get a out of memory error. Is it possible to use memory?
My code (or the github):
from itertools import combinations
import numpy
# Find the unused members and put this in a other group
def findMembers(listIn,listMembers):
lengthlist2 = (len(listMembers)-len(listIn[0]))
group2 = [0] * lengthlist2 #making the other groups based on the length of the first group
for i in listIn:
wichRow = 0
for x in listMembers:
if not (x in i) :
group2[wichRow] = x
wichRow += 1
listIn.append(group2)
return listIn
#you give a list of members and the numbers of groups
#you get back all the possibilities of combinations
def findCombinations(listMembers,numbersOfGroups):
groupTemp = [] #list needed to save correctly all the combinations
group = [] #list needed for keep it simple
newGroup = [] #list that will be returned
for listPossibilities in combinations(listMembers,(len(listMembers)/numbersOfGroups)):
groupTemp.append(list(listPossibilities))
group.append(groupTemp) #saving all the possibilities
groupTemp = []
for k in group:
# place the unused members in group2
k = (findMembers(k,listMembers))
if numbersOfGroups > 2:
groupTemp = []
groupTemp = findCombinations(k[1],numbersOfGroups-1)
for i in groupTemp:
listTemp = []
listTemp.append(k[0])
listTemp.extend(i)
newGroup.append(listTemp)
else:
newGroup = group
return newGroup
# Calculate the happiness of the group
def findHappiness(tabel,listIn):
happiness = 0
for i in listIn:
for j in i:
for k in i:
happiness += tabel[j][k]
return happiness
def buildTabel(members): #build a random survey
tabel = numpy.random.random((members,members))
return tabel
def calculateHappiness(group):
print "Finding all the happiness: "
maxhappiness = 0
i = 0
for x in group:
happiness = findHappiness(tabel,x)
if happiness > maxhappiness:
maxhappiness = happiness
y = x
progress = int(round((((i)*1.0/(len(group)))*100.0)))
update_progress(progress)
i += 1
print "\n Best solution: ", y, " with: ", maxhappiness, " happiness"
def update_progress(progress):
print '\r[{0}] {1}%'.format('#'*(progress/5), progress),
if __name__ == "__main__":
members = 24 # members of the group
numbersOfGroups = 3
tabel = buildTabel(members) #preferences will be stored here
listMembers = (range(members)) #members of the group that need to be divided
print "Searching all the combinations..."
group = findCombinations(listMembers,numbersOfGroups) #find all the combinations (recursive)
print len(group)," combinations"
calculateHappiness(group) #calculate the most happiest group and print
the error:
Searching all the combinations...
Traceback (most recent call last):
File "main.py", line 75, in <module>
calculateHappiness(group) #calculate the most happiest group and print
File "main.py", line 38, in findCombinations
newGroup = group
MemoryError
I'm using windows 10 64bit with 6gb ram. Is it possible to use virtual ram or disk space of mine hard drive disk?

Categories