diff --git a/nltkvid18.py b/nltkvid18.py index f5b0ef2..56dabf4 100644 --- a/nltkvid18.py +++ b/nltkvid18.py @@ -13,6 +13,8 @@ from nltk.tokenize import word_tokenize +import io + class VoteClassifier(ClassifierI): def __init__(self, *classifiers): @@ -35,8 +37,8 @@ def confidence(self, features): conf = choice_votes / len(votes) return conf -short_pos = open("short_reviews/positive.txt","r").read() -short_neg = open("short_reviews/negative.txt","r").read() +short_pos = io.open("short_reviews/positive.txt", encoding="latin-1").read() +short_neg = io.open("short_reviews/negative.txt", encoding="latin-1").read() documents = []