-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_preprocessor.py
More file actions
104 lines (92 loc) · 3.04 KB
/
data_preprocessor.py
File metadata and controls
104 lines (92 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import json
import Constants
import os
from pprint import pprint
import description_rater as dr
import progressbar, glob
path = os.getcwd()
Train_data_path = os.path.join(path + '/' + Constants.training_data)
f = open(Train_data_path, 'w')
f.write('id' + ',')
f.write('price' + ',')
f.write('isFree' + ',')
f.write('version' + ',')
f.write('offersIAP' + ',')
f.write('adSupported' + ',')
f.write('androidVersion' + ',')
f.write('contentRating' + ',')
f.write('textScore' + ',')
f.write('hasVideo' + ',')
f.write('score')
f.write('\n')
count = 1095
with progressbar.ProgressBar(max_value=Constants.Max_Number_Of_Apps) as progress:
while count < Constants.Max_Number_Of_Apps:
count = count + 1
data = json.load(open('/clean_dataset/' + str(count) + '/meta.json'))
Video_file = os.path.join('/clean_dataset/' + str(count) + '/' + '*.mp4')
price = data["price"]
if price[0] == '$':
price = price[1:]
if 'free' in data.keys():
if data["free"] == True:
isFree = 1
else:
isFree = 0
if 'version' in data.keys():
version = data["version"]
version = version[0]
if type(version) != int:
version = 1
else:
version = 0
if 'offersIAP' in data.keys():
if data["offersIAP"] == True:
offersIAP = 1
else:
offersIAP = 0
# offersIAP = data["offersIAP"]
# adSupported = data["adSupported"]
if 'adSupported' in data.keys():
if data["adSupported"] == True:
adSupported = 1
else:
adSupported = 0
androidVersion = data["androidVersion"]
if len(androidVersion) > 3:
androidVersion = androidVersion[:2]
if androidVersion == 'VA':
androidVersion = 1
contentRating = data["contentRating"]
if contentRating == 'Mature 17+':
contentRating = 2
elif (contentRating == 'Teen'):
contentRating = 3
elif (contentRating == 'Unrated'):
contentRating = 0
elif (contentRating == 'Everyone'):
contentRating = 4
elif (contentRating == 'Everyone 10+'):
contentRating = 1
if (glob.glob(Video_file)):
hasVideo = 1
else:
# print glob.glob(Video_file)
hasVideo = 0
description = data["description"]
# print description
description_score = dr.rate_description(description)
score = data["score"]
f.write(str(count) + ',')
f.write(price + ',')
f.write(str(isFree) + ',')
f.write(str(version) + ',')
f.write(str(offersIAP) + ',')
f.write(str(adSupported) + ',')
f.write(str(androidVersion) + ',')
f.write(str(contentRating) + ',')
f.write(str(description_score) + ',')
f.write(str(hasVideo) + ',')
f.write(str(score))
f.write('\n')
progress.update(count)