From cf7ea7a8a740bbfae8d25aa3f783b871507ae813 Mon Sep 17 00:00:00 2001
From: Jatin Mandav <jatinmandav3@gmail.com>
Date: Sun, 10 Jun 2018 11:47:53 +0530
Subject: [PATCH] Solved Issue UnicodeDecodeError

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x97 in position 3118: invalid start byte
---
 nltkvid18.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nltkvid18.py b/nltkvid18.py
index f5b0ef2..56dabf4 100644
--- a/nltkvid18.py
+++ b/nltkvid18.py
@@ -13,6 +13,8 @@
 
 from nltk.tokenize import word_tokenize
 
+import io
+
 
 class VoteClassifier(ClassifierI):
     def __init__(self, *classifiers):
@@ -35,8 +37,8 @@ def confidence(self, features):
         conf = choice_votes / len(votes)
         return conf
         
-short_pos = open("short_reviews/positive.txt","r").read()
-short_neg = open("short_reviews/negative.txt","r").read()
+short_pos = io.open("short_reviews/positive.txt", encoding="latin-1").read()
+short_neg = io.open("short_reviews/negative.txt", encoding="latin-1").read()
 
 documents = []