-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathquiz_3.R
104 lines (72 loc) · 3.06 KB
/
quiz_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Setup -------------------------------------------------------------------
library(AppliedPredictiveModeling)
library(caret)
library(rattle)
# Question 1 --------------------------------------------------------------
rm(list = ls())
data(segmentationOriginal)
training = subset(segmentationOriginal, Case == "Train")
testing = subset(segmentationOriginal, Case == "Test")
set.seed(125)
model = train(Class ~ ., method = 'rpart', data = training)
print(model)
fancyRpartPlot(model$finalModel)
sampleData = training[1,]
sampleData[1,names(sampleData)] = rep(NA, length(names(sampleData)))
sampleData = rbind(sampleData, sampleData)
sampleData = rbind(sampleData, sampleData)
keep = c('Case', 'Class', 'Cell')
#keep = c()
sampleData = training[1:4,]
sampleData[1,!(names(sampleData) %in% keep)] = rep(NA, length(names(sampleData))-length(keep))
sampleData[1, c('TotalIntenCh2', 'FiberWidthCh1', 'PerimStatusCh1')] = c(23000, 10, 2)
sampleData[2, c('TotalIntenCh2', 'FiberWidthCh1', 'VarIntenCh4')] = c(50000, 10, 100)
sampleData[3, c('TotalIntenCh2', 'FiberWidthCh1', 'VarIntenCh4')] = c(57000, 8, 100)
sampleData[4, c('FiberWidthCh1', 'VarIntenCh4', 'PerimStatusCh1')] = c(8, 100, 2)
predict(model, sampleData[2,], verbose = TRUE)
# This did not work for some reason....
# Question 2 --------------------------------------------------------------
# For k-Fold validation:
# Larger K = less bias and more variance
# Smaller K = more bias and less variance
# Leave-One-Out cross validation is a special case of k-Fold where k = N
# Question 3 --------------------------------------------------------------
rm(list = ls())
library(pgmm)
data(olive)
olive = olive[,-1]
model = train(Area ~ ., method = 'rpart', data = olive)
fancyRpartPlot(model$finalModel)
newdata = as.data.frame(t(colMeans(olive)))
predict(model, newdata)
# The prediction is strange since the result should be a catagorical result and not a continious result
# Question 4 --------------------------------------------------------------
rm(list = ls())
library(ElemStatLearn)
data(SAheart)
set.seed(8484)
train = sample(1:dim(SAheart)[1],size=dim(SAheart)[1]/2,replace=F)
trainSA = SAheart[train,]
testSA = SAheart[-train,]
set.seed(13234)
model = train(chd ~ age + alcohol + obesity + tobacco + typea + ldl, method = 'glm', family = 'binomial', data = trainSA)
trainPred = predict(model, trainSA)
testPred = predict(model, testSA)
missClass = function(values,prediction){sum(((prediction > 0.5)*1) != values)/length(values)}
trainMissClass = missClass(trainSA$chd, trainPred)
testMissClass = missClass(testSA$chd, testPred)
# TrainMissClass = 0.2727
# TestMissClass = 0.3117
# Question 5 --------------------------------------------------------------
rm(list = ls())
library(ElemStatLearn)
data(vowel.train)
data(vowel.test)
vowel.train$y = as.factor(vowel.train$y)
vowel.test$y = as.factor(vowel.test$y)
set.seed(33833)
model = train(y ~ ., method = 'rf', data = vowel.train, prox = TRUE) # This takes some time...
print(model)
vi = varImp(model$finalModel)
vi = data.frame(var = 1:nrow(vi), imp = vi$Overall)
vi[order(vi$imp),]