From cf7ea7a8a740bbfae8d25aa3f783b871507ae813 Mon Sep 17 00:00:00 2001 From: Jatin Mandav Date: Sun, 10 Jun 2018 11:47:53 +0530 Subject: [PATCH] Solved Issue UnicodeDecodeError UnicodeDecodeError: 'utf-8' codec can't decode byte 0x97 in position 3118: invalid start byte --- nltkvid18.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nltkvid18.py b/nltkvid18.py index f5b0ef2..56dabf4 100644 --- a/nltkvid18.py +++ b/nltkvid18.py @@ -13,6 +13,8 @@ from nltk.tokenize import word_tokenize +import io + class VoteClassifier(ClassifierI): def __init__(self, *classifiers): @@ -35,8 +37,8 @@ def confidence(self, features): conf = choice_votes / len(votes) return conf -short_pos = open("short_reviews/positive.txt","r").read() -short_neg = open("short_reviews/negative.txt","r").read() +short_pos = io.open("short_reviews/positive.txt", encoding="latin-1").read() +short_neg = io.open("short_reviews/negative.txt", encoding="latin-1").read() documents = []