# Lecture B # Packages needed # install.packages("leaps") # install.packages("lars") # install.packages("glmnet") # install.packages("bestglm") # install.packages("GGally") # install.packages("mplot") # install.packages("hierNet") ## B1. Cross-validation for model selection data("diabetes", package = "lars") library("glmnet") x <- diabetes$x2 # includes squares and interactions y <- diabetes$y lasso.fit <- glmnet(x, y) length(lasso.fit$lambda) lasso.predict <- predict(lasso.fit, newx = x) dim(lasso.predict) lasso.aic = rep(0, 100) sigma.hat = summary(lm(y ~ x))$s for (m in 1:100) { yhat = lasso.predict[, m] k = lasso.fit$df[m] lasso.aic[m] = sum((y - yhat)^2)/(sigma.hat^2) + 2*k } which.min(lasso.aic) lasso.fit$lambda[32] set.seed(2020) lasso.cv = cv.glmnet(x, y) plot(lasso.cv) abline(h = min(lasso.cv$cvm) + lasso.cv$cvsd[which.min(lasso.cv$cvm)], col = "blue") lasso.cv$lambda.1se aic_betas = glmnet(x, y, lambda = lasso.fit$lambda[32])$beta as.matrix(aic_betas)[as.matrix(aic_betas)>0, ] cv_betas = glmnet(x, y, lambda = lasso.cv$lambda.1se)$beta as.matrix(cv_betas)[as.matrix(cv_betas)>0,] library(caret) control <- trainControl(method = "cv", number = 10) dat <- cbind(y, x) aic_cv <- train(y ~ bmi + map + ltg + glu + `age^2` + `bmi^2` + `glu^2` + `age:sex` + `age:map` + `age:ltg` + `age:glu` + `sex:map` + `bmi:map`, method = "lm", data = dat, trControl = control) cv_cv <- train(y ~ bmi + map + ltg + `bmi^2` + `glu^2` + `age:sex` + `age:map` + `age:glu` + `bmi:map`, method = "lm", data = dat, trControl = control) results <- resamples(list(AIC = aic_cv, CV = cv_cv)) ggplot(results) + theme_classic(base_size = 18) + labs(x = "MAE") ## B2. The mplot package # install.packages("devtools") # devtools::install_github("garthtarr/mplot") install.packages("mplot") library(mplot) library(mplot) lm.art <- lm(y ~ ., data = artificialeg) summary(lm.art) lm.fin <- lm(y ~ . - x8, data = artificialeg) summary(lm.fin) art.true <- lm(y ~ x8, data = artificialeg) summary(art.true) library(mplot) vis.art <- vis(lm.art) plot(vis.art, which = "vip", interactive = TRUE) plot(vis.art, which = "lvk", interactive = TRUE) plot(vis.art, which = "lvk", highlight = "x6", interactive = TRUE) plot(vis.art, which = "boot", interactive = FALSE) plot(vis.art, which = "boot", interactive = TRUE) library(lars) x <- as.matrix(subset(artificialeg, select = -y)) y <- as.matrix(subset(artificialeg, select = y)) art.lars <- lars(x, y) plot(art.lars, xvar = "step", breaks = FALSE, lwd = 2, cex.lab = 1.4) plot(art.lars, xvar = "step", plottype = "Cp", lwd = 2, cex.lab = 1.4) bgn.art <- bglmnet(lm.art, lambda = seq(0.05, 2.75, 0.1)) plot(bgn.art, which = "boot", interactive = TRUE) bgn.art <- bglmnet(lm.art) plot(bgn.art, which = "vip", interactive = TRUE) ## B3. Subtractive stability measures sstab = function(mf, B = 100) { full_coeff = coefficients(mf) kf = length(full_coeff) coef.res = matrix(ncol = kf, nrow = B) colnames(coef.res) = names(full_coeff) formula = stats::formula(mf) data = model.frame(mf) for(i in 1:B){ wts = stats::rexp(n = length(resid(mf)), rate = 1) data$wts = wts mod = stats::lm(formula = formula, data = data, weights = wts) coef.res[i, ] = coefficients(mod) } return(coef.res) } data("diabetes", package = "lars") x = diabetes$x y = diabetes$y df = data.frame(scale(cbind(y, x))) lm1 = lm(y ~ ., data = df) sj = sstab(lm1) round(head(sj[, -1], n = 10), 2) sj = abs(sj[, -1]) sj_ranks = apply(sj, 1, rank) head(t(sj_ranks), n = 10) sj_rank_mean = sort(apply(sj_ranks, 1, mean), decreasing = TRUE) sj_rank_mean # tc table(sj_ranks[5, ])/100 # ltg table(sj_ranks[9, ])/100