In my problem, I'm trying to compare the perplexity values of different N-gram models, say till N=4.
However, I'm confused with the other results obtained using other methods.
Here is my first implementation: -
import nltk
nltk.download('punkt')
nltk.download('webtext')
from nltk.corpus import webtext
nltk.download('stopwords')
from nltk.lm.preprocessing import padded_everygram_pipeline
from nltk.lm import Laplace
from nltk.lm import MLE
from sklearn.model_selection import train_test_split
from decimal import Decimal
import numpy as np
from nltk.util import ngrams
corpus = []
for fileid in webtext.fileids():
corpus += [list(i) for i in webtext.sents(fileid)]
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
import re
stopwords = set(nltk.corpus.stopwords.words('english'))
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
def preprocess_sentence(text):
text = text.lower()
text = re.sub(r'[^\w\s]', '', text)
text = re.sub(r'[0-9]', '', text)
#remove unwanted spaces
text = re.sub(' +', ' ', text)
#remove trailing spaces
text = text.strip()
#remove stop words
f = []
for w in text.split(" "):
if w not in stopwords:
f.append(w)
text = " ".join(f)
# text = text.split(" ")
# text = [stemmer.stem(word) for word in text]
# text = [lemmatizer.lemmatize(word) for word in text]
return text
data = []
for sent in corpus:
newstr = ' '.join(sent)
newstr = preprocess_sentence(newstr)
newlist = newstr.split(' ')
data.append(newlist)
**Bigram Model**
n = 2
train_data, padded_vocab = padded_everygram_pipeline(n, train)
model_bi = Laplace(n)
model_bi.fit(train_data, padded_vocab)
generated_2grams = []
for sent in test:
generated_2grams.append(list(ngrams(sent, 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>')))
PP_bi = []
sum_sent_pp = 0
for sent in generated_2grams:
totalWords = len(sent)
sent_pp = 1
for bigram in sent:
s1, s2 = bigram[0], bigram[1].split()
score = model_bi.score(s1, s2) # score for each bigram
if score != 0:
sent_pp *= (1 / Decimal(score))
sent_pp = pow(sent_pp, Decimal(1 / totalWords))
PP_bi.append(sent_pp)
print(f"Perplexity of a Bigram Model is {sum(PP_bi) / len(PP_bi)}")
**Trigram Model**
n = 3
train_data, padded_vocab = padded_everygram_pipeline(n, train)
model_tri = Laplace(n)
model_tri.fit(train_data, padded_vocab)
generated_3grams = []
for sent in test:
generated_3grams.append(list(ngrams(sent, 3, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>')))
PP_tri = []
for sent in generated_3grams:
totalWords = len(sent)
sent_pp = 1
for trigram in sent:
s1, s2, s3 = trigram[0], trigram[1], trigram[2]
jointStr = s1 + " " + s2
l = jointStr.split()
score = model_tri.score(s3, l)
if score != 0:
sent_pp *= Decimal(1 / score)
sent_pp = pow(sent_pp, Decimal(1 / totalWords))
PP_tri.append(sent_pp)
print(f"Perplexity of a Trigram Model is {sum(PP_tri) / len(PP_tri)}")
**Quadgram Model**
n = 4
train_data, padded_vocab = padded_everygram_pipeline(n, train)
model_quad = Laplace(n)
generated_4grams = []
for sent in test:
generated_4grams.append(list(ngrams(sent, 4, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>')))
PP_quad = []
for sent in generated_4grams:
totalWords = len(sent)
sent_pp = 1
for quadgram in sent:
s1, s2, s3, s4 = quadgram[0], quadgram[1], quadgram[2], quadgram[3]
jointStr = s1 + " " + s2 + " " + s3
l = jointStr.split()
score = model_quad.score(s4, l)
if score != 0:
sent_pp *= Decimal(1 / score)
sent_pp = pow(sent_pp, Decimal(1 / totalWords))
PP_quad.append(sent_pp)
print(f"Perplexity of a Quadgram Model is {sum(PP_quad) / len(PP_quad)}")
Results:
1) Perplexity of Bigram : 12900.02
2) Perplexity of Trigram: 6241.26
3) Perplexity of Quadgram: 6804.64
The Perplexity should decrease in Quadgram model, however, it's more than the trigram one.
Moreover, I'm not so sure that the perplexity values I'm getting, and the method I have implemented is correct.
I also tried finding perplexites usin nltk.perplexity(), however, I'm getting different results. The input to nltk.perplexity is bigrams of a sentence in a corpus.
With both the approaches giving different results, I'm quite confused.
Related
from app import getPhonemes
import pandas as pd
import sys
triphones = []
def phonemize(sentence):
tokens = sentence.split(' ')
phonemes = getPhonemes(tokens)
return '$'.join(phonemes)
def generateTriphones(phonemes):
triphones = []
for i in range(len(phonemes)):
for j in range(len(phonemes)):
for k in range(len(phonemes)):
triphones.append(phonemes[i] + ' ' + phonemes[j] + ' ' + phonemes[k])
return triphones
def scoreSentence(sentence,phonemes):
flag = 0
global triphones
score = 0
tokens = sentence.split('$')
uniqueTokens = set(tokens)
triphoneticTokens = [token for token in uniqueTokens if token.count(' ') > 1]
for token in triphoneticTokens:
for triphone in triphones:
if token.find(triphone) != -1:
score += 1
triphones.remove(triphone)
if triphones == []:
flag = -1
return score, flag
def Process(fil):
global triphones
file = open('itudict/vocab.phoneme', 'r',encoding='utf-8')
data = []
for line in file:
data.append(line.strip())
file.close()
phonemes = data[4:]
triphones = generateTriphones(phonemes)
data = pd.read_csv(fil+'.csv')
data = data.drop(['score','covered_vocab'],axis=1)
i = 1
while len(data) > 0:
print('Processing File: '+str(i))
sentencee = data[:10000]
data = data[10000:]
sentences = sentencee['sentence'].tolist()
phonemes = []
scores = []
for j in range(len(sentences)):
if j%1000 == 0:
print('Processing Sentence: '+str(j))
print(len(triphones))
phones = phonemize(sentences[j])
score, flag = scoreSentence(phones,phonemes)
if flag == -1:
data = []
phonemes.append(phones)
scores.append(score)
data['Phonemes'] = phonemes
data['score'] = scores
data.to_csv(fil+'phonemized'+str(i)+'.csv', index=False)
i += 1
if __name__ == '__main__':
Process(sys.argv[1])
I am trying to generate the phonemes for 800000 sentences. The model which am using is G2P which phonemizes the sentence. after phonemization i am calculating the scores. the phoneme array which i am using for calculating scores is of size 2620000.
The length of sentences are 800000 and the code is taking days, can somebody parallelize this code or suggest some solution
I want to parallelize this code to execute faster.
i have some problems on my code that show some error when i run it. i'm using python
so, here's my code
import collections
import nltk.classify.util, nltk.metrics
from nltk.classify import SklearnClassifier
import csv
from sklearn import cross_validation
from sklearn.svm import LinearSVC, SVC
import random
from nltk.corpus import stopwords
import itertools
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
posdata = []
with open('positive-data.csv', 'rb') as myfile:
reader = csv.reader(myfile, delimiter=',')
for val in reader:
posdata.append(val[0])
negdata = []
with open('negative-data.csv', 'rb') as myfile:
reader = csv.reader(myfile, delimiter=',')
for val in reader:
negdata.append(val[0])
def word_split(data):
data_new = []
for word in data:
word_filter = [i.lower() for i in word.split()]
data_new.append(word_filter)
return data_new
def word_split_sentiment(data):
data_new = []
for (word, sentiment) in data:
word_filter = [i.lower() for i in word.split()]
data_new.append((word_filter, sentiment))
return data_new
def word_feats(words):
return dict([(word, True) for word in words])
stopset = set(stopwords.words('english')) - set(('over', 'under', 'below', 'more', 'most', 'no', 'not', 'only', 'such', 'few', 'so', 'too', 'very', 'just', 'any', 'once'))
def stopword_filtered_word_feats(words):
return dict([(word, True) for word in words if word not in stopset])
def bigram_word_feats(words, score_fn=BigramAssocMeasures.chi_sq, n=200):
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn, n)
"""
print words
for ngram in itertools.chain(words, bigrams):
if ngram not in stopset:
print ngram
exit()
"""
return dict([(ngram, True) for ngram in itertools.chain(words, bigrams)])
def bigram_word_feats_stopwords(words, score_fn=BigramAssocMeasures.chi_sq, n=200):
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn, n)
"""
print words
for ngram in itertools.chain(words, bigrams):
if ngram not in stopset:
print ngram
exit()
"""
return dict([(ngram, True) for ngram in itertools.chain(words, bigrams) if ngram not in stopset])
# Calculating Precision, Recall & F-measure
def evaluate_classifier(featx):
negfeats = [(featx(f), 'neg') for f in word_split(negdata)]
posfeats = [(featx(f), 'pos') for f in word_split(posdata)]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
classifier = 'svm'
for cl in classifier:
if cl == 'svm':
classifierName = 'SVM'
classifier = SklearnClassifier(LinearSVC(), sparse=False)
classifier.train(trainfeats)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
accuracy = nltk.classify.util.accuracy(classifier, testfeats)
pos_precision = nltk.metrics.precision(refsets['pos'], testsets['pos'])
pos_recall = nltk.metrics.recall(refsets['pos'], testsets['pos'])
pos_fmeasure = nltk.metrics.f_measure(refsets['pos'], testsets['pos'])
neg_precision = nltk.metrics.precision(refsets['neg'], testsets['neg'])
neg_recall = nltk.metrics.recall(refsets['neg'], testsets['neg'])
neg_fmeasure = nltk.metrics.f_measure(refsets['neg'], testsets['neg'])
print ''
print '---------------------------------------'
print 'SINGLE FOLD RESULT ' + '(' + classifierName + ')'
print '---------------------------------------'
print 'accuracy:', accuracy
print 'precision', (pos_precision + neg_precision) / 2
print 'recall', (pos_recall + neg_recall) / 2
print 'f-measure', (pos_fmeasure + neg_fmeasure) / 2
print ''
## CROSS VALIDATION
trainfeats = negfeats + posfeats
# SHUFFLE TRAIN SET
random.shuffle(trainfeats)
n = 5
for cl in classifier_list:
subset_size = len(trainfeats) / n
accuracy = []
pos_precision = []
pos_recall = []
neg_precision = []
neg_recall = []
pos_fmeasure = []
neg_fmeasure = []
cv_count = 1
for i in range(n):
testing_this_round = trainfeats[i*subset_size:][:subset_size]
training_this_round = trainfeats[:i*subset_size] + trainfeats[(i+1)*subset_size:]
if cl == 'svm':
classifierName = 'SVM'
classifier = SklearnClassifier(LinearSVC(), sparse=False)
classifier.train(training_this_round)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testing_this_round):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
cv_accuracy = nltk.classify.util.accuracy(classifier, testing_this_round)
cv_pos_precision = nltk.metrics.precision(refsets['pos'], testsets['pos'])
cv_pos_recall = nltk.metrics.recall(refsets['pos'], testsets['pos'])
cv_pos_fmeasure = nltk.metrics.f_measure(refsets['pos'], testsets['pos'])
cv_neg_precision = nltk.metrics.precision(refsets['neg'], testsets['neg'])
cv_neg_recall = nltk.metrics.recall(refsets['neg'], testsets['neg'])
cv_neg_fmeasure = nltk.metrics.f_measure(refsets['neg'], testsets['neg'])
accuracy.append(cv_accuracy)
pos_precision.append(cv_pos_precision)
pos_recall.append(cv_pos_recall)
neg_precision.append(cv_neg_precision)
neg_recall.append(cv_neg_recall)
pos_fmeasure.append(cv_pos_fmeasure)
neg_fmeasure.append(cv_neg_fmeasure)
cv_count += 1
print '---------------------------------------'
print 'N-FOLD CROSS VALIDATION RESULT ' + '(' + classifierName + ')'
print '---------------------------------------'
print 'accuracy:', sum(accuracy) / n
print 'precision', (sum(pos_precision)/n + sum(neg_precision)/n) / 2
print 'recall', (sum(pos_recall)/n + sum(neg_recall)/n) / 2
print 'f-measure', (sum(pos_fmeasure)/n + sum(neg_fmeasure)/n) / 2
print ''
evaluate_classifier(word_feats)`
its suppose to analysis sentiment from csv document using SVM, but when i run the code i got this error anyone have an idea to fix it??
really need your help guys
I am trying to use the maxent classifier using the NLTK library. I have a list of positive and negative words and I have trained the classifier on the same. The problem is when I test the classifier against a sentence I always get the same probability of classification for the two classes. Here is the code -
import nltk, nltk.classify.util, nltk.metrics
from nltk.classify import MaxentClassifier
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist
from sklearn import cross_validation
nltk.data.path.append("/home/daksh/Documents/Softwares/nltk_data")
import csv
import operator
from nltk.classify import MaxentClassifier
from nltk.corpus import movie_reviews
def getBestWords(posWords,negWords):
word_fd = FreqDist()
label_word_fd = ConditionalFreqDist()
for word in posWords:
word_fd[word.lower()] += 1
label_word_fd['pos'][word.lower()] += 1
for word in negWords:
word_fd[word.lower()] += 1
label_word_fd['neg'][word.lower()] += 1
pos_word_count = label_word_fd['pos'].N()
neg_word_count = label_word_fd['neg'].N()
total_word_count = pos_word_count + neg_word_count
word_scores = {}
for word, freq in word_fd.items():
pos_score = BigramAssocMeasures.chi_sq(label_word_fd['pos'][word],
(freq, pos_word_count), total_word_count)
neg_score = BigramAssocMeasures.chi_sq(label_word_fd['neg'][word],
(freq, neg_word_count), total_word_count)
word_scores[word] = pos_score + neg_score
sorted_x = sorted(word_scores.items(), key=operator.itemgetter(1),reverse=True)[:2500]
bestwords = set([w for w,s in sorted_x])
return bestwords
def best_word_feats(words,bestwords):
return dict([(word, True) for word in words if word in bestwords])
def word_feats(words):
return dict([(word, True) for word in words])
def best_bigram_word_feats(words,posWords,negWords, score_fn=BigramAssocMeasures.chi_sq, n=200):
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn, n)
d = dict([(bigram, True) for bigram in bigrams])
bestwords = getBestWords(posWords,negWords)
d.update(best_word_feats(words,bestwords))
return d
posWords = list()
negWords = list()
with open('../data/finalSentiPosWords.csv','r') as csvfile:
spamreader = csv.reader(csvfile)
posWords = list(spamreader)
with open('../data/finalSentiNegWords.csv','r') as csvfile:
spamreader = csv.reader(csvfile)
negWords = list(spamreader)
posWords = [word[0] for word in posWords]
negWords = [word[0] for word in negWords]
bestwords = getBestWords(posWords,negWords)
posfeats = [(best_bigram_word_feats(posWords,posWords,negWords),'pos')]
negfeats = [(best_bigram_word_feats(negWords,posWords,negWords),'neg')]
trainfeats = negfeats + posfeats
algorithm = nltk.classify.MaxentClassifier.ALGORITHMS[0]
classifier = nltk.MaxentClassifier.train(trainfeats, algorithm,max_iter=5)
# classifier = nltk.NaiveBayesClassifier.train(trainfeats)
classifier.show_most_informative_features(10)
sentence = "Dosa had a tangy taste but it was fun eating it. On the other hand the other dosa was soggy"
l = sentence.split(' ')
print(l)
print(word_feats(l))
print(classifier.prob_classify(word_feats(l)).prob('pos'))
print(classifier.prob_classify(word_feats(l)).prob('neg'))
The output of this is this -
0.500074231063
0.499925768937
The overall classification seems to be working fine but I can't figure out how the probabilities are calculated and why are they always same even if I change the test sentence.
Any quick help appreciated.
Thanks.
That's a lot of code! I'm not going to debug it for you, but I notice that bestwords is the set of all words in your training corpus. If that's not outright wrong, it's certainly misleadingly named.
I have a huge block of code, I didn't want to bother you with in the first place. I tried figuring out what's going wrong for over a week now and I contacted several external sources (without any response), and at the moment I'm just wondering: maybe the problem is my training set?
For my thesis I need to classify a whole bunch of tweets as pos/neg/neutral. The code I wrote works OK on test datasets I make up myself (e.g. consisting out of 15 training sentences: 5 pos, 5 neg and 5 neutral; 6 test sentences: 2 pos, 2 neg, 2 neutral - only 1 test sentence gets misclassified).
Once I start running the code on the manually classified training set (1629 pos, 1411 neutral tweets and only 690 neg) and 900 test tweets, things start going wrong. Of the 900 test tweets, the HUGE majority gets classified as pos (between 700 and 800), while there's only a minority of neg and neutral tweets.
Would somebody please be so kind as to check my code and help me figure out what I'm doing wrong? I'd be really grateful. If you need any more information, I'd be happy to provide it.
import re, math, collections, itertools
import nltk
import nltk.classify.util, nltk.metrics
import csv
from nltk.classify import NaiveBayesClassifier
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist
from nltk.util import ngrams
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import *
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords = True)
pos = []
neg = []
neutral = []
with open('C:\\...pos.csv', 'r', encoding = "utf8") as f: #open positive training set
reader = csv.reader(f)
for row in reader:
pos.extend(row)
with open('C:\\ ...neg.csv', 'r', encoding = "utf8") as f: #open negative training set
reader = csv.reader(f)
for row in reader:
neg.extend(row)
with open('C:\\...neutral.csv', 'r', encoding = "utf8") as f: #open neutral training set
reader = csv.reader(f)
for row in reader:
neutral.extend(row)
def uni(doc):
x = []
y = []
for tweet in doc:
x.append(word_tokenize(tweet))
for element in x:
for word in element:
if len(word)>2:
word = word.lower()
word = stemmer.stem(word)
y.append(word)
return y
def word_feats_uni(doc):
return dict([(word, True) for word in uni(doc)])
def tokenizer_ngrams(document):
all_tokens = []
filtered_tokens = []
for (sentence) in document:
all_tokens.append(word_tokenize(sentence))
return all_tokens
def get_bi (document):
x = tokenizer_ngrams(document)
c = []
for sentence in x:
c.extend([bigram for bigram in nltk.bigrams(sentence)])
return c
def get_tri(document):
x = tokenizer_ngrams(document)
c = []
for sentence in x:
c.extend([bigram for bigram in nltk.bigrams(sentence)])
return c
def word_feats_bi(doc):
return dict([(word, True) for word in get_bi(doc)])
def word_feats_tri(doc):
return dict([(word, True) for word in get_tri(doc)])
def word_feats_test(doc):
feats_test = {}
feats_test.update(word_feats_uni(doc))
feats_test.update(word_feats_bi(doc))
feats_test.update(word_feats_tri(doc))
return feats_test
pos_feats = [(word_feats_uni(pos),'1')] + [(word_feats_bi(pos),'1')] + [(word_feats_tri(pos),'1')]
neg_feats = [(word_feats_uni(neg),'-1')] + [(word_feats_bi(neg),'-1')] + [(word_feats_tri(neg),'-1')]
neutral_feats = [(word_feats_uni(neutral),'0')] + [(word_feats_bi(neutral),'0')] + [(word_feats_tri(neutral),'0')]
trainfeats = pos_feats + neg_feats + neutral_feats
random.shuffle(trainfeats)
classifier = NaiveBayesClassifier.train(trainfeats)
testtweets = []
with open('C:\\ ... testtweets.csv', 'r', encoding = "utf8") as f: #open testset
reader = csv.reader(f, delimiter = ';')
for row in reader:
testtweets.extend([row])
date = []
word = []
y = []
def classification(date,sentence): #doc = sentencelist
i = 0
for tweet in sentence:
sent = classifier.classify(word_feats_test([tweet]))
y.extend([(date[i],tweet,sent)])
i = i + 1
def result(doc):
i = 0
while i in range(0,len(doc) -1):
date.append(doc[i][0])
word.append(doc[i][1])
i = i + 1
classification(date,word)
result(testtweets)
with open('C:\\...write.csv', 'w') as fp: #write classified test set to file
a = csv.writer(fp, delimiter=',')
a.writerows(y)
Sorry to dump a whole block of code (below) here. I've been trying to figure out what I'm doing wrong, but unfortunately I have no idea.
For my thesis I have to classify tweets as neutral (0), negative (-1) or positive (1). I'm trying this by using NLTK. Goal is that the code returns a dictionary in the form 'tweetA,0','tweetB,-1'... At the moment, if I enter more than one tweet as an input, I only get the result (i.e. -1/0/1) for the first tweet back.
For example, if I put 'I love oranges','I hate tomatoes' as in input, I only get '1' as a return and not '1','-1'.
If anyone would be able to help me out, I'd be really grateful!
The code I have up until now:
import re, math, collections, itertools
import nltk
import nltk.classify.util, nltk.metrics
from nltk.classify import NaiveBayesClassifier
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist
from nltk.util import ngrams
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import *
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords = True)
pos_tweets = ['I love bananas','I like pears','I eat oranges']
neg_tweets = ['I hate lettuce','I do not like tomatoes','I hate apples']
neutral_tweets = ['I buy chicken','I am boiling eggs','I am chopping vegetables']
def uni(doc):
x = []
y = []
for tweet in doc:
x.append(word_tokenize(tweet))
for element in x:
for word in element:
if len(word)>2:
word = word.lower()
word = stemmer.stem(word)
y.append(word)
return y
def word_feats_uni(doc):
return dict([(word, True) for word in uni(doc)])
def tokenizer_ngrams(document):
all_tokens = []
filtered_tokens = []
for (sentence) in document:
all_tokens.append(word_tokenize(sentence))
return all_tokens
def get_bi (document):
x = tokenizer_ngrams(document)
c = []
for sentence in x:
c.extend([bigram for bigram in nltk.bigrams(sentence)])
return c
def get_tri(document):
x = tokenizer_ngrams(document)
c = []
for sentence in x:
c.extend([bigram for bigram in nltk.bigrams(sentence)])
return c
def word_feats_bi(doc):
return dict([(word, True) for word in get_bi(doc)])
def word_feats_tri(doc):
return dict([(word, True) for word in get_tri(doc)])
def word_feats_test(doc):
feats_test = {}
i = 0
for tweet in doc:
feats_test.update(word_feats_uni(tweet))
feats_test.update(word_feats_bi(tweet))
feats_test.update(word_feats_tri(tweet))
return feats_test
pos_feats = [(word_feats_uni(pos_tweets),'1')] + [(word_feats_bi(pos_tweets),'1')] + [(word_feats_tri(pos_tweets),'1')]
neg_feats = [(word_feats_uni(neg_tweets),'-1')] + [(word_feats_bi(neg_tweets),'-1')] + [(word_feats_tri(neg_tweets),'-1')]
neutral_feats = [(word_feats_uni(neutral_tweets),'0')] + [(word_feats_bi(neutral_tweets),'0')] + [(word_feats_tri(neutral_tweets),'0')]
trainfeats = pos_feats + neg_feats + neutral_feats
classifier = NaiveBayesClassifier.train(trainfeats)
print (classifier.classify(word_feats_test(['I love oranges'])))