I am try to do topic modelling using LSA, with the following code:
and in the next line I have this:
top_n_words_lsa = get_top_n_words(10,
lsa_keys,
small_document_term_matrix,
small_count_vectorizer)
for i in range(len(top_n_words_lsa)):
print("Topic {}: ".format(i+1), top_n_words_lsa[i])
But I am facing this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_15164/599799419.py in <module>
----> 1 top_n_words_lsa = get_top_n_words(10,
2 lsa_keys,
3 small_document_term_matrix,
4 small_count_vectorizer)
5
~\AppData\Local\Temp/ipykernel_15164/2401631730.py in get_top_n_words(n, keys,
document_term_matrix, count_vectorizer)
11 if keys[i] == topic:
12 temp_vector_sum += document_term_matrix[i]
---> 13 temp_vector_sum = temp_vector_sum.toarray()
14 top_n_word_indices = np.flip(np.argsort(temp_vector_sum)[0][-n:],0)
15 top_word_indices.append(top_n_word_indices)
AttributeError: 'int' object has no attribute 'toarray'
The related helper functions are defined below:
# Define helper functions
def get_top_n_words(n, keys, document_term_matrix, count_vectorizer):
'''
returns a list of n_topic strings, where each string contains the n most common
words in a predicted category, in order
'''
top_word_indices = []
for topic in range(n_topics):
temp_vector_sum = 0
for i in range(len(keys)):
if keys[i] == topic:
temp_vector_sum += document_term_matrix[i]
temp_vector_sum = temp_vector_sum.toarray()
top_n_word_indices = np.flip(np.argsort(temp_vector_sum)[0][-n:],0)
top_word_indices.append(top_n_word_indices)
top_words = []
for topic in top_word_indices:
topic_words = []
for index in topic:
temp_word_vector = np.zeros((1,document_term_matrix.shape[1]))
temp_word_vector[:,index] = 1
the_word = count_vectorizer.inverse_transform(temp_word_vector)[0][0]
topic_words.append(the_word.encode('ascii').decode('utf-8'))
top_words.append(" ".join(topic_words))
return top_words
Can you please tell me what I am missing here?
You define temp_vector_sum as 0 and then add to it. So it's an object of type int. That class doesn't define a function toarray. You could do something like: np.array([temp_vector_sum]).
I am running a sentiment analysis on a csv file and I am receiving this error message. I have tried a few things to resolve it and have not been successful. Any help would be greatly appreciated! Thank you!
Here is my code:
def sentimentAFINN(text):
words = pattern_split.split(text.lower())
sentiments = len(list(map(lambda word: afinn.get(word, 0), words)))
if sentiments:
sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
else:
sentiment = 0
return sentiment
def sentimentDisplayValue(sentimentScore):
if sentimentScore > 0.1:
return "Positive"
elif sentimentScore < -0.1:
return "Negative"
else:
return "Neutral"
totals = defaultdict(int)
for (index, row) in data.iterrows():
text = row['comment']
text_munged = munger(text)
sentimentScore = sentimentAFINN(text_munged)
sentimentDisplay = sentimentDisplayValue(sentimentScore)
totals[sentimentDisplay] = totals[sentimentDisplay] + 1
pt.add_row([text_munged, sentimentScore, sentimentDisplay])
print (pt)
print (totals)
This is my error message:
TypeError Traceback (most recent call last)
<ipython-input-73-b20887003b41> in <module>
4 text = row['LikelyToReferComment']
5 text_munged = munger(text)
----> 6 sentimentScore = sentimentAFINN(text_munged)
7 sentimentDisplay = sentimentDisplayValue(sentimentScore)
8 totals[sentimentDisplay] = totals[sentimentDisplay] + 1
<ipython-input-72-f95f79f94b60> in sentimentAFINN(text)
29 sentiments = len(list(map(lambda word: afinn.get(word, 0), words)))
30 if sentiments:
---> 31 sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
32
33 else:
TypeError: 'int' object is not iterable
Your sentiments variable is an int since its the value returned by len(). You are trying to call sum() and len() on sentiments. Both sum() and len() expect an iterable datatype.
You can change your sentimentAFINN() like this
def sentimentAFINN(text):
words = pattern_split.split(text.lower())
# save your list in sentiments
sentiments = list(map(lambda word: afinn.get(word, 0), words))
# now you check length of sentiments and return accordingly
return float(sum(sentiments))/math.sqrt(len(sentiments)) if len(sentiments) > 0 else 0
def FindBestGamma(u,DLx,DLy,DLcon,x0,y0,t,sigmax0,sigmay0,varinterval):
u=5
g = np.linspace(0.,1.,u)
gammaPossible = np.array([seq for seq in itertools.product(g, repeat=u-1) if abs(sum(seq) - 1.0) < 1.0e-4])
i = 0
lv = []
while i < len(gammaPossible):
L = []
j = 0
while j < 1000:
var = np.array(sampler(varinterval).next())
tmp =LV(DLx[i],DLy[i],DLcon[i],gammaPossible[i],var,x0,y0,t,sigmax0,sigmay0)
L.append(tmp)
j = j+1
lv.append(max(L))
i = i+1
# return np.argmax(np.array(lv))#
gamma = gammaPossible[np.argmax(np.array(lv))]
return gamma
def forwd(x,y,gamma,var,x0,y0,t,sigmax0,sigmay0):
[mux1,muy1,sigmax1,sigmay1,rho1,mux2,muy2,sigmax2,sigmay2,rho2,mux3,muy3,sigmax3,sigmay3,rho3,mux4,muy4,sigmax4,sigmay4,rho4] = var
return gamma[0]*ff(x,y,mux1,muy1,sigmax1,sigmay1,rho1,x0,y0,t,sigmax0,sigmay0) + gamma[1]*ff(x,y,mux2,muy2,sigmax2,sigmay2,rho2,x0,y0,t,sigmax0,sigmay0) + gamma[2]*ff(x,y,mux3,muy3,sigmax3,sigmay3,rho3,x0,y0,t,sigmax0,sigmay0) + gamma[3]*ff(x,y,mux4,muy4,sigmax4,sigmay4,rho4,x0,y0,t,sigmax0,sigmay0)
This is a sample of a section from my code. Whenever I try to run the whole code, I get the error: 'builtin_function_or_method' object has no attribute 'getitem' in my def forwd section. I think it has to do with the gamma calculation because when I test the code by giving gamma a set array such as gamma=[0.0,0.0,0.25,0.75], the code works. How do I fix the code to prevent the error in the forwd section?
File "/Users/maryjacketti/Desktop/SOSim/sunkunoil/s4.py", line 71, in integ
return forwd(x,y,gamma,var,x0new,y0new,t,sigmax0,sigmay0)*LV(DLx,DLy,DLcon,gamma,var,x0,y0,tt,sigmax0,sigmay0)
File "/Users/maryjacketti/Desktop/SOSim/sunkunoil/s4.py", line 49, in forwd
return gamma[0]*ff(x,y,mux1,muy1,sigmax1,sigmay1,rho1,x0,y0,t,sigmax0,sigmay0) + gamma[1]*ff(x,y,mux2,muy2,sigmax2,sigmay2,rho2,x0,y0,t,sigmax0,sigmay0) + gamma[2]*ff(x,y,mux3,muy3,sigmax3,sigmay3,rho3,x0,y0,t,sigmax0,sigmay0) + gamma[3]*ff(x,y,mux4,muy4,sigmax4,sigmay4,rho4,x0,y0,t,sigmax0,sigmay0)
TypeError: 'builtin_function_or_method' object has no attribute '__getitem__'
This is the error I get.
Python: 3.4
So for some reason the following code throws an error in python console.
But the append is working and I get the correct(replace process completed) values in the new list.
mobile = []
for col in ws.iter_cols():
for cell in col:
if cell.value == 'Mobile':
x=column_index_from_string(cell.column)
for row in ws.iter_rows(min_col = x, min_row = 2, max_col = x):
for cell in row:
mobile.append(cell.value)
mob_f = []
for i in mobile:
h = i
h = h.replace(" ","")
h = h.replace("+(91)-",",")
h = h.replace("+91","")
h = h.replace("-","")
mob_f.append(h)
Error:
Traceback (most recent call last):
File "", line 3, in
AttributeError: 'NoneType' object has no attribute 'replace'
I get this error
Traceback (most recent call last):
File "C:\Users\User1\Desktop\cellh5_scripts\ewa_pnas_fate.py", line 90, in <module>
ec.combine_classifiers("Event labels combined")
File "C:\Users\User1\Desktop\cellh5_scripts\ewa_pnas_fate.py", line 53, in combine_classifiers
pnas_class[pnas_class==3] = 1
TypeError: 'numpy.int32' object does not support item assignment
by runing the code
def combine_classifiers(self, output_name):
all_combined_classes = []
for _, (plate_name, w, p, t1, t2, track_ids, track_labels) in self.mapping[['Plate',
'Well',
'Site',
'Gene Symbol',
'siRNA ID',
'Event track ids',
'Event track labels']].iterrows():
combined_classes = []
ch5_file_handle = self.cellh5_handles[plate_name]
ch5_pos = ch5_file_handle.get_position(w, str(p))
for track_id, track_label in zip(track_ids, track_labels):
h2b_class = track_label.copy()
print(track_id)
pnas_class = ch5_pos.get_class_prediction('secondary__expanded')[track_id]['label_idx'] + 1
print(pnas_class)
inter_idx = h2b_class == 1
pnas_class[pnas_class==3] = 1
pnas_class[pnas_class==2]+=2
combined_class = h2b_class
combined_class[inter_idx] = pnas_class[inter_idx]
combined_classes.append(combined_class)
all_combined_classes.append(combined_classes)
self.mapping[output_name] = pandas.Series(all_combined_classes)
I print pnas_class which is 1, and track_id which is 50708. I'm wondering what the designer of code want to do in the part:
inter_idx = h2b_class == 1
pnas_class[pnas_class==3] = 1
pnas_class[pnas_class==2]+=2
combined_class = h2b_class
combined_class[inter_idx] = pnas_class[inter_idx]
How can I change that to have the same meaning?
pnas_class is a an integer so you can't select item from an integer by [pnas_class==3] = 1.
Maybe you are trying to affect 1 to pnas_class if it's equal to 3. In this case try this:
pnas_class= 1*(pnas_class == 3) + pnas_class*(pnas_class != 3 )
Ok I found the mistake. You arer right the pnas_class should not be an integer and I know why is it integer instead of array.