I am running a sentiment analysis on a csv file and I am receiving this error message. I have tried a few things to resolve it and have not been successful. Any help would be greatly appreciated! Thank you!
Here is my code:
def sentimentAFINN(text):
words = pattern_split.split(text.lower())
sentiments = len(list(map(lambda word: afinn.get(word, 0), words)))
if sentiments:
sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
else:
sentiment = 0
return sentiment
def sentimentDisplayValue(sentimentScore):
if sentimentScore > 0.1:
return "Positive"
elif sentimentScore < -0.1:
return "Negative"
else:
return "Neutral"
totals = defaultdict(int)
for (index, row) in data.iterrows():
text = row['comment']
text_munged = munger(text)
sentimentScore = sentimentAFINN(text_munged)
sentimentDisplay = sentimentDisplayValue(sentimentScore)
totals[sentimentDisplay] = totals[sentimentDisplay] + 1
pt.add_row([text_munged, sentimentScore, sentimentDisplay])
print (pt)
print (totals)
This is my error message:
TypeError Traceback (most recent call last)
<ipython-input-73-b20887003b41> in <module>
4 text = row['LikelyToReferComment']
5 text_munged = munger(text)
----> 6 sentimentScore = sentimentAFINN(text_munged)
7 sentimentDisplay = sentimentDisplayValue(sentimentScore)
8 totals[sentimentDisplay] = totals[sentimentDisplay] + 1
<ipython-input-72-f95f79f94b60> in sentimentAFINN(text)
29 sentiments = len(list(map(lambda word: afinn.get(word, 0), words)))
30 if sentiments:
---> 31 sentiment = float(sum(sentiments))/math.sqrt(len(sentiments))
32
33 else:
TypeError: 'int' object is not iterable
Your sentiments variable is an int since its the value returned by len(). You are trying to call sum() and len() on sentiments. Both sum() and len() expect an iterable datatype.
You can change your sentimentAFINN() like this
def sentimentAFINN(text):
words = pattern_split.split(text.lower())
# save your list in sentiments
sentiments = list(map(lambda word: afinn.get(word, 0), words))
# now you check length of sentiments and return accordingly
return float(sum(sentiments))/math.sqrt(len(sentiments)) if len(sentiments) > 0 else 0
Related
I am try to do topic modelling using LSA, with the following code:
and in the next line I have this:
top_n_words_lsa = get_top_n_words(10,
lsa_keys,
small_document_term_matrix,
small_count_vectorizer)
for i in range(len(top_n_words_lsa)):
print("Topic {}: ".format(i+1), top_n_words_lsa[i])
But I am facing this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_15164/599799419.py in <module>
----> 1 top_n_words_lsa = get_top_n_words(10,
2 lsa_keys,
3 small_document_term_matrix,
4 small_count_vectorizer)
5
~\AppData\Local\Temp/ipykernel_15164/2401631730.py in get_top_n_words(n, keys,
document_term_matrix, count_vectorizer)
11 if keys[i] == topic:
12 temp_vector_sum += document_term_matrix[i]
---> 13 temp_vector_sum = temp_vector_sum.toarray()
14 top_n_word_indices = np.flip(np.argsort(temp_vector_sum)[0][-n:],0)
15 top_word_indices.append(top_n_word_indices)
AttributeError: 'int' object has no attribute 'toarray'
The related helper functions are defined below:
# Define helper functions
def get_top_n_words(n, keys, document_term_matrix, count_vectorizer):
'''
returns a list of n_topic strings, where each string contains the n most common
words in a predicted category, in order
'''
top_word_indices = []
for topic in range(n_topics):
temp_vector_sum = 0
for i in range(len(keys)):
if keys[i] == topic:
temp_vector_sum += document_term_matrix[i]
temp_vector_sum = temp_vector_sum.toarray()
top_n_word_indices = np.flip(np.argsort(temp_vector_sum)[0][-n:],0)
top_word_indices.append(top_n_word_indices)
top_words = []
for topic in top_word_indices:
topic_words = []
for index in topic:
temp_word_vector = np.zeros((1,document_term_matrix.shape[1]))
temp_word_vector[:,index] = 1
the_word = count_vectorizer.inverse_transform(temp_word_vector)[0][0]
topic_words.append(the_word.encode('ascii').decode('utf-8'))
top_words.append(" ".join(topic_words))
return top_words
Can you please tell me what I am missing here?
You define temp_vector_sum as 0 and then add to it. So it's an object of type int. That class doesn't define a function toarray. You could do something like: np.array([temp_vector_sum]).
My 1st time here!
I think I got a trouble in my code, could you help me?
The point is, I created a function with nltk module and I don't know where I'm going wrong when it reports me that errors:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-94-7fe386d50c39> in <module>
----> 1 frasescomstemming = fazstemmer(base)
<ipython-input-93-e5451b452b6b> in fazstemmer(texto)
7 for (palavras, emocao) in texto:
8 comstemming = [str(stemmer.stem(p))
----> 9 for p in palavras.split() if p not in stopwords]
10 frasessstemming.append((comstemming, emocao))
11 return frasessstemming
AttributeError: 'int' object has no attribute 'split'
Here is my code:
def fazstemmer(texto):
stemmer = nltk.stem.RSLPStemmer()
frasessstemming = []
for (palavras, emocao) in texto:
comstemming = [str(stemmer.stem(p))
for p in palavras.split() if p not in stopwords]
frasessstemming.append((comstemming, emocao))
return frasessstemming
As for AttributeError: 'int' object has no attribute 'split' This should be caused by for p in palavras.split() if p not in stopwords] You can change the type of palavras in the last loop.
def fazstemmer(texto):
stemmer = nltk.stem.RSLPStemmer()
frasessstemming = []
for (palavras, emocao) in texto:
comstemming = str(stemmer.stem(p))
palavras = str(palavras)
for p in palavras.split():
if p not in stopwords:
frasessstemming.append((comstemming, emocao))
return frasessstemming
Without an example input texto and desired output, it's hard to guess what's the data structure / type of the variables, so here's a guess.
Try this:
stemmer = nltk.stem.RSLPStemmer()
def fazstemmer(text):
for words, emotion in text:
words, emotion = str(words), str(emotion)
for word in words.split():
if word not in stopwords:
yield stemmer.stem(word), emotion
stemmed_text = list(fazstemmer(text))
Recently i starded learning python, this would be my first "useful" script (simple word learning script). I got a "TypeError: 'dict' object is not callable", but it's unrelevant in this case(alteast for me).
def discounted_price(total,discount,minprice=100):
fruits = ['Apple','Avocado','Banana','Blackberries','Blueberries','Cherries','Date Fruit','Grapes','Guava','Jackfruit','Kiwifruit']
prices = [6,5,3,10,12,7,14,15,8,7,9]
d1 = dict(zip(fruits,prices))
chart = ['Blueberries','Blueberries','Grapes','Apple','Apple','Apple','Blueberries','Guava','Jackfruit','Blueberries','Jackfruit']
fruit_price = None
counter_item = {}
for i in chart:
if i in counter_item:
counter_item[i] +=1
else:
counter_item[i] =1
counter_item
fruit_price = {}
for i, j in d1.items():
for x, y in counter_item.items():
if i == x:
fruit_price[i]=(j*y)
fruit_price
total_price = 0
for i in fruit_price.values():
total_price += i
total_price
minprice=100
discount=0
if total_price<minprice:
discount=total_price
else:
discount=total_price*(10/100)
discounted_price=total_price-discount
return discounted_price
print(discounted_price(total_price(counter_item(chart),fruit_price),10,minprice=100))
Type error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-38-7dbc173506d2> in <module>
30 discounted_price=total_price-discount
31 return discounted_price
---> 32 print(discounted_price(total_price(counter_item(chart),fruit_price),10,minprice=100))
TypeError: 'dict' object is not callable
Thanks!
I'm trying to get PMI count from work part file which is opened in NX software in Python, but getting error:
TypeError: object of type 'NXOpen.Annotations.PmiCollection' has no len()
Code:
lw = theSession.ListingWindow
lw.Open()
theSession = NXOpen.Session.GetSession()
theParts = theSession.Parts
theWorkPart = theParts.Work
allPMIObjects = theWorkPart.PmiManager.Pmis
count1 = len(allPMIObjects)
lw.WriteLine(count1)
lw.Close()
Documentation link: https://docs.plm.automation.siemens.com/data_services/resources/nx/11/nx_api/custom/en_US/nxopen_python_ref/NXOpen.Annotations.PmiCollection.html
Equivalent vb code: http://nxjournaling.com/content/find-out-if-part-has-any-pmi
Below is the code that gives count of PMIs from active part and assert if count is changed (here 3):
lw = theSession.ListingWindow
lw.Open()
# Custom code starts to get PMI count in part and check
theSession = NXOpen.Session.GetSession()
theParts = theSession.Parts
theWorkPart = theParts.Work
allPMIObjects = theWorkPart.PmiManager.Pmis
i = 0
for p in allPMIObjects:
i = i + 1
lw.WriteLine(str(i))
lw.Close()
#if PMI count is changed from 3, raise AssertionError:
assert i == 3
I am trying to run sentiment analysis on a selection of a data set, but every time I do I get this error: KeyError: 0
For reference, this is the code I am working with:
OC = df[df["text"].str.contains("Obamacare")]
from textblob import TextBlob
import re
def clean_tweet(tweet):
return " ".join(re.sub("(#[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def analize_sentiment(tweet):
analysis = TextBlob(clean_tweet(tweet))
if analysis.sentiment.polarity > 0:
return 1
elif analysis.sentiment.polarity == 0:
return 0
else:
return -1
df["sentiment"] = np.array([ analize_sentiment(tweet) for tweet in df["text"]])
pos_tweets = [tweet for index, tweet in enumerate(OC['text']) if OC['sentiment'][index] > 0]
neu_tweets = [ tweet for index, tweet in enumerate(OC['text']) if OC['sentiment'][index] == 0]
neg_tweets = [ tweet for index, tweet in enumerate(OC['text']) if OC['sentiment'][index] < 0]
It's after I try to run the pos_tweets, neu_tweets, neg_tweets that I keep getting Key Error: 0
I'm not sure what you're enumerating or why that is part of a sentiment analysis. This is how I did it...
def clean(tweet):
return " ".join(re.sub("(#[A-Za-z0-9]+) | ([^0-9A-Za-z\t]) | (w+:\/\/\s+)", " ", tweet).split())
def sentiment_analysis(tweet):
analysis = TextBlob(clean(tweet))
if analysis.sentiment.polarity > 0:
return 1
elif analysis.sentiment.polarity == 0:
return 0
else:
return -1
df["sentiment"] = np.array([sentiment_analysis(tweet) for tweet in
df["text"]])
df["OC"] = df.text.str.contains("obamacare", case = False)
df2 = df.loc[df["OC"] == True]
df2.sentiment.value_counts()
I basically just ran your list comprehension against the entire df, and then parsed.