setwd("c:/monographregression/golf") set.seed(123) # load packages library("penalized") library("foreign") # read data and rename golf <- read.dta(file="daten/golf.dta") names(golf)[c(1,7,8,9)] <- c("price", "kilometerop1", "kilometerop2", "kilometerop3") # modified function for path plots myplotpath <- function (object, labelsize = 0.6, standardize = FALSE, ...) { if (length(object) > 0) { betas <- sapply(object, coefficients, "p") } if (standardize) { weights <- weights(object[[1]]) if (length(weights) > nrow(betas)) weights <- weights[-seq_len(length(weights) - nrow(betas))] betas <- betas * matrix(weights, nrow = nrow(betas), ncol = ncol(betas)) } remove <- apply(betas, 1, function(bet) all(bet == 0)) if (all(remove)) stop("all coefficients are zero for all values of lambda in this object") lambda <- sapply(object, function(object) object@lambda1) label <- expression("smoothing parameter "*lambda) if (all(lambda == lambda[1])) { lambda <- sapply(object, function(object) object@lambda2) label <- expression("smoothing parameter "*lambda) } labwidth <- ifelse(labelsize > 0, max(strwidth(rownames(betas[!remove, ]), "inches", labelsize)), 0) margins <- par("mai") par(mai = c(margins[1:3], max(margins[4], labwidth * 1.4))) matplot(lambda, t(betas[!remove, , drop = FALSE]), type = "l", ylab = "estimated coefficient", xlab = label, col = "black", xlim = rev(range(lambda)), lty=1, ...) if (labelsize > 0 && !is.null(rownames(betas))) { take <- which(!remove) for (i in 1:sum(!remove)) { j <- take[i] axis(4, at = betas[j, ncol(betas)], labels = rownames(betas)[j], las = 1, cex.axis = labelsize, col.axis = "black", lty = 1, col = "black") } } par(mai = margins) return(invisible(NULL)) } # ridge regularized estimation ol2 <- optL2(price, ~ ageop1 + ageop2 + ageop3 + kilometerop1 + kilometerop2 + kilometerop3 + TIA + extras1 + extras2, standardize=TRUE, data=golf, model="linear", fold=10) cf2 <- coefficients(ol2$fullfit) prof2 <- profL2(price, ~ ageop1 + ageop2 + ageop3 + kilometerop1 + kilometerop2 + kilometerop3 + TIA + extras1 + extras2, fold = 10, steps=20, standardize=TRUE, data=golf, minlambda=.000001, maxlambda=1000, model="linear") #path plot par(mai=c(0.8,0.8,0.1,0.8)) myplotpath(prof2$fullfit, standardize=TRUE, labelsize=0.9) abline(v=ol2$lambda, lty=2) cf3 <- coefficients(prof2$fullfit[[20]]) cf4 <- coefficients(prof2$fullfit[[4]]) # LASSO regularized estimation ol1 <- optL1(price, ~ ageop1 + ageop2 + ageop3 + kilometerop1 + kilometerop2 + kilometerop3 + TIA + extras1 + extras2, standardize=TRUE, data=golf, model="linear", fold=10) cf1 <- coefficients(ol1$fullfit) # path plot par(mai=c(0.8,0.8,0.1,0.8)) l1 <- penalized(price, ~ ageop1 + ageop2 + ageop3 + kilometerop1 + kilometerop2 + kilometerop3 + TIA + extras1 + extras2, standardize=TRUE, data=golf, steps="Park") myplotpath(l1, standardize=TRUE, labelsize=0.9) abline(v=ol1$lambda, lty=2) # table with estimated effects id <- match(names(cf2), names(cf1)) round(cbind(cf3, cf2, cf4, cf1[id]), 3)