Skip to content

Commit afbb513

Browse files
authored
backport VIF to 1.x release (#549)
* [G]VIF (#548) * [G]VIF * add reference value source * more tests * glm tests (cherry picked from commit b1ba4c5) * fix formula implementation * version bump
1 parent d32f361 commit afbb513

File tree

4 files changed

+51
-6
lines changed

4 files changed

+51
-6
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "GLM"
22
uuid = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
3-
version = "1.8.3"
3+
version = "1.9.0"
44

55
[deps]
66
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -26,7 +26,7 @@ SpecialFunctions = "0.6, 0.7, 0.8, 0.9, 0.10, 1, 2.0"
2626
StatsAPI = "1.4"
2727
StatsBase = "0.33.5, 0.34"
2828
StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0"
29-
StatsModels = "0.6.23, 0.7"
29+
StatsModels = "0.7.3"
3030
julia = "1.6"
3131

3232
[extras]

src/GLM.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@ module GLM
1010
import Base: (\), convert, show, size
1111
import LinearAlgebra: cholesky, cholesky!
1212
import Statistics: cor
13-
import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
14-
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
13+
using StatsAPI
14+
import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual,
15+
loglikelihood, nullloglikelihood, nobs, stderror, vcov,
16+
residuals, predict, predict!,
1517
fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
1618
import StatsFuns: xlogy
1719
import SpecialFunctions: erfc, erfcinv, digamma, trigamma
1820
import StatsModels: hasintercept
1921
export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
2022
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
2123
fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
22-
cooksdistance, hasintercept, dispersion
24+
cooksdistance, hasintercept, dispersion, vif, gvif, termnames
2325

2426
export
2527
# types

src/linpred.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ response(obj::LinPredModel) = obj.rr.y
263263

264264
fitted(m::LinPredModel) = m.rr.mu
265265
predict(mm::LinPredModel) = fitted(mm)
266-
StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula
266+
StatsModels.formula(::LinPredModel) = throw(ArgumentError("model was fitted without a formula"))
267267
residuals(obj::LinPredModel) = residuals(obj.rr)
268268

269269
"""

test/runtests.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,3 +1602,46 @@ end
16021602
# 3. 44 / wt == y
16031603
@test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) GLM.logpdf(Binomial(Int(wt), μ), 44)
16041604
end
1605+
1606+
@testset "[G]VIF" begin
1607+
# Reference values from car::vif in R:
1608+
# > library(car)
1609+
# > data(Duncan)
1610+
# > lm1 = lm(prestige ~ 1 + income + education, Duncan)
1611+
# > vif(lm1)
1612+
# income education
1613+
# 2.1049 2.1049
1614+
# > lm2 = lm(prestige ~ 1 + income + education + type, Duncan)
1615+
# > vif(lm2)
1616+
# GVIF Df GVIF^(1/(2*Df))
1617+
# income 2.209178 1 1.486330
1618+
# education 5.297584 1 2.301648
1619+
# type 5.098592 2 1.502666
1620+
duncan = RDatasets.dataset("car", "Duncan")
1621+
lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan)
1622+
@test termnames(lm1)[2] == coefnames(lm1)
1623+
@test vif(lm1) gvif(lm1)
1624+
1625+
lm1_noform = lm(modelmatrix(lm1), response(lm1))
1626+
@test vif(lm1) vif(lm1_noform)
1627+
@test_throws ArgumentError("model was fitted without a formula") gvif(lm1_noform)
1628+
1629+
lm1log = lm(@formula(Prestige ~ 1 + exp(log(Income)) + exp(log(Education))), duncan)
1630+
@test termnames(lm1log)[2] == coefnames(lm1log) == ["(Intercept)", "exp(log(Income))", "exp(log(Education))"]
1631+
@test vif(lm1) vif(lm1log)
1632+
1633+
gm1 = glm(modelmatrix(lm1), response(lm1), Normal())
1634+
@test vif(lm1) vif(gm1)
1635+
1636+
lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan)
1637+
@test termnames(lm2)[2] != coefnames(lm2)
1638+
@test gvif(lm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4
1639+
1640+
gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal())
1641+
@test termnames(gm2)[2] != coefnames(gm2)
1642+
@test gvif(gm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4
1643+
1644+
# the VIF definition depends on modelmatrix, vcov and stderror returning valid
1645+
# values. It doesn't care about links, offsets, etc. as long as the model matrix,
1646+
# vcov matrix and stderrors are well defined.
1647+
end

0 commit comments

Comments
 (0)