SentiWordNet

In []:
from nltk.corpus import sentiwordnet as swn
In []:
breakdown = swn.senti_synset('breakdown.n.03')
print(breakdown)
print 'pos %s neg:%.3f obj:%.3f' % (breakdown.pos_score(), breakdown.neg_score(), breakdown.obj_score())

lookup a synset

In []:
list(swn.senti_synsets('slow')) 
In []:
happy = swn.senti_synsets('happy', 'a')
In []:
all = swn.all_senti_synsets()
In []:
print happy[0]

Labeled Data for Sentiment Analysis

In []:
from nltk.corpus import movie_reviews
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from random import shuffle
In []:
pos_files = movie_reviews.fileids('pos')
neg_files = movie_reviews.fileids('neg')
In []:
words_for_one_file = movie_reviews.words(fileids=[neg_files[0]])
In []:
def sentiment_feats(words):
    return {'empty_feature': 0}
In []:
negfeats = [(sentiment_feats(movie_reviews.words(fileids=[f])), 'neg') 
            for f in neg_files]
posfeats = [(sentiment_feats(movie_reviews.words(fileids=[f])), 'pos') 
            for f in pos_files]
In []:
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4

shuffle(negfeats)
shuffle(posfeats)
    
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
 
classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
classifier.show_most_informative_features()

Now you:

* Fill in sentiment_feats(...)
* Use a different classifier.  (Scikits Learn is fair game)
In []: