Traceback KeyError [closed] - python

Closed. This question is not written in English. It is not currently accepting answers.
Stack Overflow is an English-only site. The author must be able to communicate in English to understand and engage with any comments and/or answers their question receives. Don't translate this post for the author; machine translations can be inaccurate, and even human translations can alter the intended meaning of the post.
Closed 4 days ago.
Improve this question
Estou tendo problemas com meu código ele me retorna sempre o mesmo erro sendo ele:
Traceback (most recent call last):
File "c:\codgos\IA\AAHAS.py", line 77, in <module>
caminho, distancia_percorrida = a_estrela(estacaoini, estacaofinal)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\codgos\IA\AAHAS.py", line 70, in a_estrela
nova_estimativa_custo = distancia_em_linha_reta(vizinho, destino)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\codgos\IA\AAHAS.py", line 39, in distancia_em_linha_reta
return distancias[origem][destino]
~~~~~~~~~~~~~~~~~~^^^^^^^^^
KeyError: 'N'
No inicio achei que poderia ser algo com como as distancias estão escritas em Caps, mas alteralas não mudou nada
Esse é o código atual:
# Definindo a tabela de distâncias entre as estações
distancias = {
'A' : {'B':11,'C':20,'D':27,'E':40,'F':43,'G':39,'H':28,'I':18,'J':10,'K':18,'L':30,'M':30,'N':32},
'B' : {'A':11,'C':9,'D':16,'E':29,'F':32,'G':28,'H':19,'I':11,'J':4,'K':17,'L':23,'M':21,'N':24},
'C' : {'A':20,'B':9,'D':7,'E':20,'F':22,'G':19,'H':15,'I':10,'J':11,'K':21,'L':21,'M':13,'N':18},
'D' : {'A':27,'B':16,'C':7,'E':13,'F':16,'G':12,'H':13,'I':13,'J':18,'K':26,'L':21,'M':11,'N':17},
'E' : {'A':40,'B':29,'C':20,'D':13,'F':3,'G':2,'H':21,'I':25,'J':31,'K':38,'L':27,'M':16,'N':20},
'F' : {'A':43,'B':32,'C':22,'D':16,'E':3,'G':4,'H':23,'I':28,'J':33,'K':41,'L':30,'M':17,'N':20},
'G' : {'A':39,'B':28,'C':19,'D':12,'E':2,'F':4,'H':22,'I':25,'J':29,'K':38,'L':28,'M':13,'N':17},
'H' : {'A':28,'B':19,'C':15,'D':13,'E':21,'F':23,'G':22,'I':9,'J':22,'K':18,'L':7,'M':25,'N':30},
'I' : {'A':18,'B':11,'C':10,'D':13,'E':25,'F':28,'G':25,'H':9,'J':13,'K':12,'L':12,'M':23,'N':28},
'J' : {'A':10,'B':4,'C':11,'D':18,'E':31,'F':33,'G':29,'H':22,'I':13,'K':20,'L':27,'M':20,'N':23},
'K' : {'A':18,'B':17,'C':21,'D':26,'E':38,'F':41,'G':38,'H':18,'I':12,'J':20,'L':15,'M':35,'N':39},
'L' : {'A':30,'B':23,'C':21,'D':21,'E':27,'F':30,'G':28,'H':7,'I':12,'J':27,'K':15,'M':31,'N':37},
'M' : {'A':30,'B':21,'C':13,'D':11,'E':16,'F':17,'G':13,'H':25,'I':23,'J':20,'K':35,'L':31,'N':5},
'N' : {'A':32,'B':24,'C':18,'D':17,'E':20,'F':20,'G':17,'H':30,'I':28,'J':23,'K':39,'L':37,'M':5}
}
# Definindo as conexões entre as estaçõesA
conexoes = {
'A': ['B'],
'B': ['A', 'J', 'I', 'C'],
'C' : ['B', 'D', 'I', 'N'],
'D' : ['C', 'H', 'E', 'M'],
'E' : ['D', 'F', 'G', 'H'],
'F' : ['E'],
'G' : ['E'],
'H' : ['D', 'E', 'I', 'L'],
'I' : ['B', 'C', 'K', 'H'],
'J' : ['B'],
'K' :['I'],
'L' :['H'],
'M' :['C','D', 'N'],
'N' :['M']
}
# Definindo a função heurística (distância em linha reta até o destino)
def distancia_em_linha_reta(origem, destino):
return distancias[origem][destino]
# Definindo a função de avaliação
def custo_total(caminho):
distancia_total = 0
tempo_total = 0
for i in range(len(caminho) - 1):
distancia_total += distancias[caminho[i]][caminho[i+1]]
if i > 0 and caminho[i] not in conexoes[caminho[i-1]]:
tempo_total += 4
tempo_total += distancia_total / 30
return distancia_total + tempo_total
# Implementando o algoritmo A*
def a_estrela(origem, destino):
fronteira = [(origem, [origem], 0, distancia_em_linha_reta(origem, destino))]
visitados = set()
while fronteira:
fronteira.sort(key=lambda x: x[2] + x[3])
proximo = fronteira.pop(0)
estacao = proximo[0]
caminho = proximo[1]
distancia_percorrida = proximo[2]
estimativa_custo = proximo[3]
if estacao == destino:
return caminho, distancia_percorrida
visitados.add(estacao)
for vizinho in conexoes[estacao]:
if vizinho not in visitados:
novo_caminho = caminho + [vizinho]
nova_distancia = distancia_percorrida + distancias[estacao][vizinho]
nova_estimativa_custo = distancia_em_linha_reta(vizinho, destino)
fronteira.append((vizinho, novo_caminho, nova_distancia, custo_total(novo_caminho) + nova_estimativa_custo))
return None
# Testando a função com os dados de exemplo
estacaoini = input("Qual sua estação de origem(A, B, C, D, E, F, G, H, I, J, K, L, M ou N):")
estacaofinal = input("Qual sua estação de destino(A, B, C, D, E, F, G, H, I, J, K, L, M ou N):")
caminho, distancia_percorrida = a_estrela(estacaoini, estacaofinal)
print('Melhor caminho:', caminho)
print('Distância percorrida:', distancia_percorrida)
Estou tentando descobrir qual seria o problema mas não consigo de maneira nenhuma, já vasculhei, até a famosa ia do momento não conseguiu me ajudar

Related

Tridiagonal matrix algorithm : math are correct but i don't get the right results

I'm trying to code tdma. I have a working program for most of the process, but I can't get the results asked.
I'm supposed to get [2,3,-1,4,-2] as a result, but I get : [-0.5120543981481481, -0.1787210648148148, 0.4824421296296296, 0.4879012345679012, -3.7802469135802474].
I've checked one by one the coefficients for each step, and they're good. I think the problem comes from resolve2, but it worked for gauss pivot. It still can be math, but I'm pretty sure it's not.
import numpy as np
A=np.array([[3.,-3.,0,0,0],[2.,8.,4.,0,0],[0,4.,-8.,3.,0],[0,0,-7.,5.,1.],[0,0,0,-1.,3.]])
B0=[-3.,3.,25.,13.,-10.]
B = np.array([B0]).reshape(len(B0),1)
def det(A):
return np.linalg.det(A)
def op_linescal(A,i,x):
n = len(A[0]) # nbre de colonnes de A
for k in range(n):
A[i,k] = x*A[i,k] # la ligne Li devient la ligne x*Li
return A #retourne la nouvelle matrice
def op_linecombi(A,i,j,x):
n = len(A[0]) # nbre de colonnes de A
for k in range(n):
A[i,k] = A[i,k] + x*A[j,k] # la ligne Li devient la ligne Li + x*Lj
return A #retourne la nouvelle matrice
def tdma1(a,b):
ne = A.shape[0] #donne le nombre d'équations donc de ligne
if ne < 3:
return print("On ne peut pas résoudre avec l'algorithme de Thomas")
if det(A) == 0: # vérifie condition pour appliquer Thomas (matrice inversible : det(A) != 0)
return print("On ne peut pas utiliser cet algorithme de Thomas, la matrice A est singulière.")
l = A.copy() #création de la matrice à modifier
b = B.copy()
for i in range(ne):
# print("{}\n{}".format(a,b))
if i == 0 :
x = 1/l[i,i]
op_linescal(l,i,x) # on divise L1 par b1
op_linescal(b,i,x) # reproduction en b
elif i != 0 and i < ne-1: # i = 1,...,N-1
x1 = -l[i,i-1]
op_linecombi(l,i,i-1,x1) # Li => Li - ai*Li-1
op_linecombi(b,i,i-1,x1)
x2 = 1/l[i,i] # où a[i,i] = bi - ai*ci-1
op_linescal(l,i,x2) # Li => Li / (bi')
op_linescal(b,i,x2)
else: # i = N
x1 = -l[i,i-1]
op_linecombi(l,i,i-1,x1) # Li => Li - ai*Li-1
op_linecombi(b,i,i-1,x1)
print('\n',np.round(a,3))
print('\n{}\n'.format(np.round(b,2)))
return a,b
def resolve2(a,b):
"Renvoie la solution du système Ax = b lorsque A est triangulaire supérieure inversible"
n =len(a[0])
x = [0 for i in range(n)]
x[n-1] = b[n-1,0]/a[n-1,n-1]
for i in range(n-2,-1,-1):
s = 0
for j in range(i+1, n):
s = s + a[i,j]*x[j]
x[i] = (b[i,0] - s)/ a[i,i]
return x
def thomas(a, b):
if det(a) == 0: # vérifie condition pour appliquer Gauss
return print("On ne peut pas utiliser Gauss, la matrice A n'est pas inversible.")
a1,b1 = tdma1(a, b)
x = resolve2(a1, b1)
return x
print(thomas(A, B))

What am i doing wrong there? error "index out of range" trying to fill a list with lists

I want to do a list with lists inside, with a for and i get index out of range
I tryed with empleados.append() but it doesnt work
def main():
empleados=[]
for i in range(1):
empleados[i][0](input("Ingrese el Nombre: "))
empleados[i][1](input("Ingrese el Apellido: "))
empleados[i][2](int(input("Ingrese el Sueldo Base: ")))
empleados[i][3](int(input("Ingrese el AFP 1 o 2: ")))
empleados[i][4](datetime(int(input("Ingrese la Fecha de Ingreso(pulsa intro cada vez 2000 12 31): ")),int(input("/")),int(input("/"))))
empleados[i][5](int(input("Ingrese la cantidad de hijos que tiene: ")))
welcome to SO!
There's no list at empleados[0] to insert new values into. I find something like this is a little easier to read:
def main():
empleados=[]
for i in range(1):
empleado_nueva = []
empleado_nueva.append(input("Ingrese el Nombre: "))
empleado_nueva.append(input("Ingrese el Apellido: "))
empleado_nueva.append(int(input("Ingrese el Sueldo Base: ")))
empleado_nueva.append(int(input("Ingrese el AFP 1 o 2: ")))
empleado_nueva.append(datetime(int(input("Ingrese la Fecha de Ingreso(pulsa intro cada vez 2000 12 31): ")),int(input("/")),int(input("/"))))
empleado_nueva.append(int(input("Ingrese la cantidad de hijos que tiene: ")))
empleados.append(empleado_nueva)
return empleados
It's worth mentioning that the index-access pattern you're attempting (empleados[i][0] = ...) only works if there's something already at that index, for instance:
>>> x = []
>>> x[0] = 1
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
IndexError: list assignment index out of range
>>> x = ['a', 'b', 'c']
>>> x[0] = 'd'
>>> x
['d', 'b', 'c']
So the append's are probably the best way to go.
The problem is you're trying use empleados[i] as a list with an existing index you can insert into, when at the moment, it's not.
You need to set up your variables a separate list and then append them. E.g.
def main():
empleados=[]
vars = [
input("Ingrese el Nombre: "),
input("Ingrese el Apellido: "),
int(input("Ingrese el Sueldo Base: ")),
int(input("Ingrese el AFP 1 o 2: ")),
datetime(int(input("Ingrese la Fecha de Ingreso(pulsa intro cada vez 2000 12 31): ")),int(input("/")),int(input("/"))),
int(input("Ingrese la cantidad de hijos que tiene: ")
empleados.append(vars)

How to sum up values of different keys if the key is find in a list of sublists?

First of all English is not my mother tongue.
I have txt file which is like this : It is a list of word with numerical score :
id;word;pos;neu;neg
0;***;21;127;0
1;vraiment;407;156;37
2;oiseau-à-berceau;102;259;0
3;Stoph;95;308;37
4;Conscience;372;144;35
5;rançonnement;0;635;433
6;provenir;304;227;47
7;esthétique;285;54;1
8;traversée;360;167;38
9;avant toute chose;241;108;34
10;sucrée;52;276;0
11;Lasker-Schüler;146;284;0
12;difficile;0;47;866
13;adjuration;300;44;40
14;Besnier-Boeck-Schaumann;0;39;315
15;sir Sacheverell Sitwell;38;277;0
16;Maria Callas;235;311;0
17;sorbre;118;302;225
I have a list of sublists like this where each list contains some words :
[['trouver', 'très', 'esthétique'], ['pêche', 'peu'], ['gros', 'prise', 'là'], ['prise', 'puis', 'borne', 'cela', 'voir'], ['derrière', 'télé', 'cela', 'aller'], ['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre'], ['laudrel', 'être', 'mourir', 'hier', 'soir'], ['venir', 'lo']]
I store the txt file in my script in a dictionnairy : which looks like this :
{'vraiment': ['407', '156', '37\r\n'], 'oiseau-à-berceau': ['102', '259', '0\r\n'], 'Stoph': ['95', '308', '37\r\n'], 'Conscience': ['372', '144', '35\r\n'], 'rançonnement': ['0', '635', '433\r\n'], 'provenir': ['304', '227', '47\r\n'], 'esthétique': ['285', '54', '1\r\n'], 'traversée': ['360', '167', '38\r\n'], 'avant toute chose': ['241', '108', '34\r\n'], 'sucrée': ['52', '276', '0\r\n'], 'Lasker-Schüler': ['146', '284', '0\r\n'], 'difficile': ['0', '47', '866\r\n'], 'adjuration': ['300', '44', '40\r\n'], 'Besnier-Boeck-Schaumann': ['0', '39', '315\r\n'], 'sir Sacheverell Sitwell': ['38', '277', '0\r\n'], 'Maria Callas': ['235', '311', '0\r\n'], 'sorbre': ['118', '302', '225']}
for each sublist in my list , I check if any of its words is my dictionnary and if it finds a word in my sublist in the dictionnary , it retrieve the numerical value of each word in my list and sum the value of each word for each column.
For example :
[ 'voir', 'système', 'aller', 'être', 'difficile', 'faire', 'plus',
'sorbre'] : 'difficile' ['0', '47', '866\r\n']
'sorbre': ['118', '302', '225']
this sublist above has two word present in my dico, I would like to sum each together each index of all the words in my sublist and do it for each sublist in my list;
I am expecting this for each list of my sublist to get the sum of score in the first index(0) and then the sum of the score in the second index(1) and the sum of the score for the third index(2) for all the word present in my dico and my sublist:
[ 'voir', 'système', 'aller', 'être', 'difficile', 'faire', 'plus',
'sorbre'] : pos : 118, neu : 349, neg 1091
This is my script :
from itertools import islice
dico = {}
#lexique = open('lexique.txt', 'rb')JeuxdeMotsPolarise.txt'
with codecs.open('lexique_entrainement.txt', 'r', 'utf-8', errors = 'ignore') as lexique:
for ligne in islice(lexique, 31, None):
#print(ligne)
ligne = ligne.split(';')
#print(ligne)
#print(ligne[-4:])
dico.update({ligne[-4]:ligne[-3:]})
print(dico)
somme_V0 = []
somme_V1 = []
somme_V2 = []
for k, v in dico.items():
for subl in Lemme_filtre2:
for w in subl:
if k == w:
somme_V0.append(int(v[0]))
somme_V1.append(int(v[1]))
somme_V2.append(int(v[2]))
#print(subl)
print(str(subl) + " : " + " a un score pos de: " + str(sum(somme_V0)) + " un score de neu de : " + str(sum(somme_V1)) + " et un score de neg de : " + str(sum(somme_V2)))118 349 1091
the output is this :
['trouver', 'très', 'esthétique'] : a un score pos de: 285 un score de neu de : 54 et un score de neg de : 1
['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre'] : a un score pos de: 285 un score de neu de : 101 et un score de neg de : 867
['voir', 'système', 'cela', 'aller', 'être','difficile', 'faire','plus', 'sorbre'] : a un score pos de: 403 un score de neu de : 403 et un score de neg de : 1092
The output is not the same as what i imagined, I think it is summing up all the value of the words in all the sublist , Do you have any idea how can I fix it , I should be like this :
['trouver', 'très', 'esthétique'] : a un score pos de: 285 un score de neu de : 54 et un score de neg de : 1
['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre'] : a un score pos de: 118 un score de neu de : 349 et un score de neg de : 1091
I would like also if the one the third sum for each sublist is higher than the other two , I want to display the list and the higher score alone.
Hopefully this will help.
dico = {"a":["1","2","3"],"b":["2","2","3"],"c":["1","3","4"],"d":["1","3","5"]}
Lemme_filtre2 = [["a","b","l","p"],["l","f","l","p"],["a","g","d","p"],["p","c","o","p"]]
for subl in Lemme_filtre2:
somme_V0 = []
somme_V1 = []
somme_V2 = []
for w in subl:
if w in dico:
somme_V0.append(int(dico[w][0]))
somme_V1.append(int(dico[w][1]))
somme_V2.append(int(dico[w][2]))
if len(somme_V0)!=0:
print(str(subl) + " : " + " a un score pos de: " + str(sum(somme_V0)) + " un score de neu de : " + str(sum(somme_V1)) + " et un score de neg de : " + str(sum(somme_V2)))
output
['a', 'b', 'l', 'p'] : a un score pos de: 3 un score de neu de : 4 et un score de neg de : 6
['a', 'g', 'd', 'p'] : a un score pos de: 2 un score de neu de : 5 et un score de neg de : 8
['p', 'c', 'o', 'p'] : a un score pos de: 1 un score de neu de : 3 et un score de neg de : 4
Here is your final code:
# Consider below as your input text
text_file = """
id;word;pos;neu;neg
0;***;21;127;0
1;vraiment;407;156;37
2;oiseau-à-berceau;102;259;0
3;Stoph;95;308;37
4;Conscience;372;144;35
5;rançonnement;0;635;433
6;provenir;304;227;47
7;esthétique;285;54;1
8;traversée;360;167;38
9;avant toute chose;241;108;34
10;sucrée;52;276;0
11;Lasker-Schüler;146;284;0
12;difficile;0;47;866
13;adjuration;300;44;40
14;Besnier-Boeck-Schaumann;0;39;315
15;sir Sacheverell Sitwell;38;277;0
16;Maria Callas;235;311;0
17;sorbre;118;302;225
"""
lst = [['trouver', 'très', 'esthétique'], ['pêche', 'peu'], ['gros', 'prise', 'là'], ['prise', 'puis', 'borne', 'cela', 'voir'], ['derrière', 'télé', 'cela', 'aller'], ['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre'], ['laudrel', 'être', 'mourir', 'hier', 'soir'], ['venir', 'lo']]
foo = ({i.split(";")[1]:i.split(";")[2:] for i in text_file.split("\n") if i}) # Spliting the text content and creating a dictionary
z = {}
for i in lst: # Loop through the lst
for j in i: # Loop through the element in inner list
if j in foo: # check if inner value is in dict foo
# Here we are creating our final result dictionary
if z.get(repr(i)): # check if the key i already exists in dictionary z, if yes then we will add the values of the dictionary. repr is used to use list as a key of dictionary.
t = [int(a) + int(b) for a,b in zip(z[repr(i)] ,foo[j])] # zip takes one element at a time of of both list. we are adding it and storing it in a list
z[repr(i)] = t # Adding the list t to dict z
else:
z[repr(i)] = foo[j] # If the value is not existing in dict z then we just keep the output of foo
print (z) # Printing our final dictonary
# Below I have looped through the result to print it as required
for k, v in z.items():
print (f"{k}: a un score pos de: {v[0]} un score de neu de : {v[1]} et un score de neg de : {v[2]}") # Printing our dict one by one
Result
# Dict output
{"['trouver', 'très', 'esthétique']": ['285', '54', '1'], "['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre']": [
118, 349, 1091]}
# Final output
['trouver', 'très', 'esthétique']: a un score pos de: 285 un score de neu de : 54 et un score de neg de : 1
['voir', 'système', 'cela', 'aller', 'être', 'difficile', 'faire', 'plus', 'sorbre']: a un score pos de: 118 un score de neu de : 349 et un score de neg de : 1091

I don't get the right nodes with my networkx code

I am working on my networkx. The question is about :
Consider the network depicted in Figure 19.27; suppose that each node starts with the behavior B, and each node has a threshold of q = 1/2 for switching to behavior A.
1(a) : Now, let e and f form a two-node set S of initial adopters of behavior A. If other nodes follow the threshold rule for choosing behaviors, which nodes will eventually switch to A?
This is the Network where it is all about:
Now, I know the answer to the question is {'c', 'e', 'f', 'i', 'k'}
But from my code, I only get {'c', 'e', 'f', 'i'}, what is wrong?
# Network, dit is de figuur 19.27 van chapter 19 voor vraag 19.8
def create_network():
H = nx.Graph()
H.add_edge('e', 'i')
H.add_edge('e', 'c')
H.add_edge('c', 'f')
H.add_edge('i', 'f')
H.add_edge('f', 'g')
H.add_edge('i', 'k')
H.add_edge('k', 'j')
H.add_edge('g', 'j')
H.add_edge('g', 'd')
H.add_edge('j', 'h')
# Voegt de edges toe zoals die in figuur 19.27 van Chapter 19 afgebeeld is.
H.add_edge('d', 'h')
for n in {'e', 'f'}: # Initial adaptors
# True betekent dat de node the nieuwe Behavior heeft geadopt
H.node[n]['new'] = True
for n in set(H.nodes()) - {'e', 'f'}: # initial adaptors
# False betekent dat de node de nieuwe Behavior Reject, dus de node
# blijft bij de oorspronkelijke behavior
H.node[n]['new'] = False
nx.draw(H, with_labels=True) # De output
return H
H = create_network()
# Het netwerk dat na een ronde adaption testen een update uitvoert of de
# eventueel toegevoegde nodes na de adaption
def adapted_network(G, a=1, b=1):
ts = b / (float(b) + a) # threshold formule van b/(a+b)
# https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.classes.function.get_node_attributes
getatt = nx.get_node_attributes(G, 'new')
nodes = [node for node in G.nodes(True) if node[1]['new'] == False]
for node, attribute in nodes:
readyforadopt = len([neighbor for neighbor in G.neighbors(
node) if getatt[neighbor] == True]) / float(len(G.neighbors(node)))
# Node zal een behavior adopten als de gegeven ts de ts van de node
# overstijgt of gelijk is
if readyforadopt >= ts:
G.node[node]['new'] = True
return [node for node in G.nodes(True) if node[1]['new'] == True]
H = create_network()
print [node for node in H.nodes(True) if node[1]['new'] == True]
print adapted_network(H)
You have to repeat calling adapted_network(H) until the spread of A stops.
adapted_network(H)
# [('f', {'new': True}),
# ('e', {'new': True}),
# ('c', {'new': True}),
# ('i', {'new': True})]
adapted_network(H)
# [('f', {'new': True}),
# ('e', {'new': True}),
# ('k', {'new': True}),
# ('c', {'new': True}),
# ('i', {'new': True})]

Error when I try to iterate more than once

I've got this program which calculate k-means for IA
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from random import sample
from itertools import repeat
from math import sqrt
# Parametros
k = 6
maxit = 2
def leeValoracionesFiltradas (nomFichero = "valoracionesFiltradas.data"):
lineas = [(l.strip()).split("\t") for l in (open(nomFichero).readlines())]
diccio = {}
for l in lineas:
diccio[int(l[0])] = {}
for l in lineas:
diccio[int(l[0])][int(l[1])] = (float(l[2]),float(l[3]))
return diccio
def distEuclidea(dic1, dic2):
# Se calcula la suma de cuadrados de los elementos comunes a los dos diccionarios
sum2 = sum([pow(dic1[elem]-dic2[elem], 2)
for elem in dic1 if elem in dic2])
return sqrt(sum2)
def similitudEuclidea(dic1, dic2):
return 1/(1+distEuclidea(dic1, dic2))
def coefPearson(dic1, dic2):
# Se consiguen los elementos comunes en ambos diccionarios
comunes = [x for x in dic1 if x in dic2]
nComunes = float(len(comunes))
# Si no hay comunes -> cero
if nComunes==0:
return 0
# Calculo de las medias de cada diccionario
media1 = sum([dic1[x][1] for x in comunes]) / nComunes
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
# Numerador y denominador
num = sum([(dic1[x][1] - media1) * (dic2[x][1] - media2) for x in comunes])
den1 = sqrt(sum([pow(dic1[x][1] - media1, 2) for x in comunes]))
den2 = sqrt(sum([pow(dic2[x][1] - media2, 2) for x in comunes]))
den = den1 * den2
# Caculo del coeficiente
if den==0:
return 0
return num/den
# Dado un diccionario {key1 : {key2 : valor}} calcula el agrupamiento k-means
# con k clusters (grupo), ejecutando maxit iteraciones, con la funcion de similitud especificada
# Retorna una tupla
# -{key1:numero de clusters} con las asignaciones de clusters (a que clusters pertenece cada elemento)
# -[{key2:valores}] una lista con los k centroides (media de los valores para cada clusters)
def kmeans (diccionario, k, maxit, similitud = coefPearson):
# K puntos aleatorios son elegidos como centroides incialmente
# Cada centroide es {key2 : valor}
centroides = [diccionario[x] for x in sample(diccionario.keys(), k)]
# Se asigna cada key1 a un numero de cluster
previo = None
asignacion = {}
# En cada iteracion se asignan puntos a los centroides y se calculan nuevos centroides
for it in range(maxit):
# Se asignan puntos a los centroides mas cercanos
for key1 in diccionario:
similitudes = map(similitud,repeat(diccionario[key1],k), centroides)
asignacion[key1] = similitudes.index(max(similitudes))
# Si no hay cambios en la asignacion, se termina
if previo == asignacion: break
previo = asignacion
# Se recalculan los centroides (se anotan los valores de cada key a cada centroide)
valores = {x : {} for x in range(k)}
contadores = {x : {} for x in range(k)}
for key1 in diccionario:
grupo = asignacion[key1]
for key2 in diccionario[key1]:
if not valores[grupo].has_key(key2):
valores [grupo][key2] = 0
contadores [grupo][key2] = 0
valores [grupo][key2] += diccionario[key1][key2][1]
contadores[grupo][key2] += 1
# Se calculan las medias (nuevos centroides)
centroides = []
for grupo in valores:
centro = {}
for key2 in valores[grupo]:
centro[key2] = round((valores[grupo][key2] / contadores[grupo][key2]),2)
centroides.append(centro)
if None in centroides: break
return (asignacion, centroides)
# Se obtiene el diccionario de valoraciones (las valoraciones ya han sido filtradas)
diccionario = leeValoracionesFiltradas()
# Se obtienen las asignaciones y los centroides con la correlacion de Pearson
tupla = kmeans (diccionario, k, maxit)
asignaciones = tupla[0]
centroids = tupla[1]
print asignaciones
print centroids
And when I execute this for example for maxit = 2, it throws:
File "kmeans_dictio.py", line 46, in coefPearson
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
TypeError: 'float' object has no attribute '__getitem__'
How can I fix this?
It looks like you have a dictionary (dic2) of floats and a dictionary of dictionaries of floats (dic1) that you are pulling an item out of with this line:
comunes = [x for x in dic1 if x in dic2]
Then you are trying to iterate over this float here:
media2 = sum([dic2[x][1] for x in comunes]) / nComunes
To fix this look at dic1 and dic2 and how they are defined.

Categories