########################################
# Code for model selection via scoring #
########################################

rm(list=ls())

# load standard libraries
library(mgcv)
library(gtools)
library(MASS, quietly = T)
library(xtable)
library(acid)

# load expectile specific libraries
library(expectreg, quietly=T)
library(expectregSelection)
library(ExtraFunctions)

# load data, maps and additional functions
peru12s <- read.table('peru12_de_new.csv',header=T,sep=";",dec=",")
source("renameTables.R")
mapPeru2 <- read.bnd("Peru_borders.bnd")

# build neighbourhood structure
neighbours <- bnd2gra(mapPeru2)

# build factor variables
peru12s$meduC            <- as.factor(peru12s$meduC)
peru12s$edupartnerC      <- as.factor(peru12s$edupartnerC)
peru12s$deadchildrenD    <- as.factor(peru12s$deadchildrenD)
peru12s$householdmembers <- as.factor(peru12s$householdmembers)
peru12s$csex             <- as.factor(peru12s$csex)
peru12s$cbirthorder      <- as.factor(peru12s$cbirthorder)
peru12s$householdhead    <- as.factor(peru12s$householdhead)
peru12s$electricity      <- as.factor(peru12s$electricity)
peru12s$radio            <- as.factor(peru12s$radio)
peru12s$television       <- as.factor(peru12s$television)
peru12s$refrigerator     <- as.factor(peru12s$refrigerator)
peru12s$bicycle          <- as.factor(peru12s$bicycle)
peru12s$motorcycle       <- as.factor(peru12s$motorcycle)
peru12s$telephone        <- as.factor(peru12s$telephone)
peru12s$caesarian        <- as.factor(peru12s$caesarian)

levels(peru12s$csex) <- c("male","female")
levels(peru12s$meduC) <- c("low","medium","high")
levels(peru12s$edupartnerC) <- c("low","medium","high")

# look at the data
str(peru12s)
summary(peru12s)





set.seed(123456)

Sys.time()
cat(" \r\n   \r\n")

# Build intercept model as initial model for the selection
InterceptModel <- expectreg.ls(stunting~1, data=peru12s, expectiles=
                               c(0.02,0.05,0.1,0.2,0.5,0.8,0.9,0.95,0.98),
                               smooth="schall", lambda=1, estimate="laws", ci=F)

# Define maximal model
nameSim <- "Bootstrap"
nameSim2 <- "Paper_c"

NameImg  <- paste("WS",      nameSim, nameSim2, ".RData", sep="_")
NameTable  <- paste("Table", nameSim, nameSim2, ".csv", sep="_")
NamePDF2 <- paste("Effects", nameSim, nameSim2, ".pdf"  , sep="_")

scopeSelection <- stunting ~ rb(cage, type="penalizedpart_pspline") + 
    rb(breastfeeding, type="penalizedpart_pspline") +
    rb(mbmi, type="penalizedpart_pspline") + 
    rb(mheight, type="penalizedpart_pspline") + 
    rb(mage, type="penalizedpart_pspline") + 
    rb(mregion,type="markov",P=neighbours,bnd=mapPeru2 ) +
    cage + 
    breastfeeding +
    mbmi + 
    mheight + 
    mage +csex + caesarian + cbirthorder +  
    meduC + edupartnerC + 
    householdmembers + householdhead + deadchildrenD +
    electricity + radio +television + refrigerator + bicycle + 
    motorcycle +  telephone

# Run model selection
Sys.time()
x <- expectreg.ls(scopeSelection, data=peru12s, expectiles=
                      c(0.02,0.05,0.1,0.2,0.5,0.8,0.9,0.95,0.98),
                  smooth="schall", lambda=1, estimate="laws", ci=F)

nbb <- 2000 # number bootstrap iterations
npred_ob <- 100 # size for grid used to predict covariate effects

# Determine side objects for predictions
k_vec <- which(grepl(x=names(x$coefficients), pattern="_prb"))
ndat_list <- list()
for(k in k_vec) {
    ndat_list[[k]] = data.frame(seq(min(x$covariates[[k]],na.rm=T),max(x$covariates[[k]],na.rm=T),length=npred_ob))
    names(ndat_list)[k] = names(x$covariates)[k]
    names(ndat_list[[k]]) = names(x$covariates)[k]
}
kr <- which(names(x$coefficients) == "mregion")
pp = x$asymmetries
np <- length(pp)
bnd = x$helper[[kr]][[1]]
regions <- data.frame(x = rownames(x$bases[[kr]]$P_orig))
colnames(regions) <- x$bases[[kr]]$xname_orig

coef_list <- predict_prb <- predict_re <- list()
# sample indices for bootstraping
bb_numbers <- NULL
for(bb_iter in 1:nbb) {
    bb_numbers <- rbind(bb_numbers,sample(nrow(peru12s),size=nrow(peru12s),replace=T))
}
Sys.time()
# apply bootstrapping on saturated model and save predicted values of GMRF and Splines
for(bb_iter in 1:nbb) {
    cat(" ", bb_iter )
    data_bb <- peru12s[bb_numbers[bb_iter,],]
    x <- expectreg.ls(scopeSelection, data=data_bb, 
                      expectiles=c(0.02,0.05,0.1,0.2,0.5,0.8,0.9,0.95,0.98),
                      smooth="schall", lambda=1, estimate="laws", ci=F)
    
    coef_list[[bb_iter]] <- x$coefficients
    ZZZ <- list()
    for(k in k_vec) {
        ndat = ndat_list[[k]]
        Bpred = predict(x$bases[[k]],ndat)
        ZZZ[[k]] = Bpred %*% x$coefficients[[k]]
    }
    predict_prb[[bb_iter]] <- ZZZ
    
    z = NULL
    Zspathelp = x$helper[[kr]][[2]]
    z = matrix(NA,nrow=nrow(Zspathelp),ncol=np)
    #rownames(x$bases[[k]]$P_orig)
    
    coefficients_markov <- expectreg:::predict.regbase(x$bases[[kr]],regions)
    z <- coefficients_markov %*% x$coefficients[[kr]]
        re = data.frame(attr(bnd,"regions"),z)
        colnames(re) <- c("regions",x$asymmetries)
    predict_re[[bb_iter]] <- re
    if(bb_iter %% 250 == 0) save.image(NameImg)

}    


Sys.time()

save.image(NameImg)
cat(" \r\n   \r\n")
Sys.time()
cat(" \r\n   \r\n")


if(!exists("npred_ob")) npred_ob <- nrow(predict_prb[[1]][[1]])

bootstrap_perc_KI <- function(x,probs = c(0.025,0.975), ...) {
    x2 <- sort(x)
    c(x2[floor(length(x)*probs[1])],x2[ceiling(length(x)*probs[2])])
}

### GMRF

# initialize tables of predicted GMRF value per region
coef_per_region_asymmetry <- list()
CI_per_region_asymmetry <- list()
decision_per_region_asymmetry <- matrix(NA, nrow=nrow(regions), ncol=np)
for(i in 1:nrow(regions)) {
    coef_per_region_asymmetry[[i]] <- matrix(NA, nrow=nbb, ncol=np)
    CI_per_region_asymmetry[[i]] <- matrix(NA, nrow=2, ncol=np)
    colnames(coef_per_region_asymmetry[[i]]) <- pp
    colnames(CI_per_region_asymmetry[[i]]) <- pp
    
}
names(coef_per_region_asymmetry) <- regions[,1]
names(CI_per_region_asymmetry) <- regions[,1]
rownames(decision_per_region_asymmetry) <- regions[,1]
colnames(decision_per_region_asymmetry) <- pp
for(bb_iter in 1:nbb){
    for(i in 1:nrow(regions)) {
        coef_per_region_asymmetry[[regions[i,1]]][bb_iter,] <- as.numeric(predict_re[[bb_iter]][(predict_re[[bb_iter]][,1] == regions[i,1] ),2:(np+1)])
    }
}

for(i in 1:nrow(regions)) {
    for(j in 1:np) {
        CI_per_region_asymmetry[[i]][,j] <- quantile(coef_per_region_asymmetry[[i]][,j],probs=c(0.025,0.975),names=FALSE)
    }
}

for(i in 1:nrow(regions)) {
    for(j in 1:np) {
        decision_per_region_asymmetry[i,j] <- (prod(CI_per_region_asymmetry[[i]][,j]) > 0)*1
    }
}

decision_GMRF <- matrix(colSums(decision_per_region_asymmetry),nrow=1)
decision_GMRF[,decision_GMRF > 0] <- 1
rownames(decision_GMRF) <- "mregion"


### List per covariate included linearly

# Each List contains a list with length = levels(covariate)
# Each List of List contains a matrix with nbb rows and np columns

list_list_coef <- list()
for(ccc in 1:length(coef_list[[1]])) {
    list_list_coef[[ccc]] <- list()
    for(lll in 1:nrow(coef_list[[1]][[ccc]])) {
        list_list_coef[[ccc]][[lll]] <- matrix(NA, nrow=nbb, ncol=np)
    }
}
list_list_CI <- list()
for(ccc in 1:length(coef_list[[1]])) {
    list_list_CI[[ccc]] <- list()
    for(lll in 1:nrow(coef_list[[1]][[ccc]])) {
        list_list_CI[[ccc]][[lll]] <- matrix(NA, nrow=2, ncol=np)
    }
}
list_decision <- list()
for(ccc in 1:length(list_list_coef)) {
    list_decision[[ccc]] <- matrix(NA, nrow=length(list_list_coef[[ccc]]), ncol=np)
}

names(list_list_coef) <- names(coef_list[[1]])
names(list_list_CI)   <- names(coef_list[[1]])
names(list_decision)   <- names(coef_list[[1]])

for(bb_iter in 1:nbb) {
    for(ccc in 1:length(coef_list[[1]])) {
        for(lll in 1:nrow(coef_list[[1]][[ccc]])) {
            list_list_coef[[ccc]][[lll]][bb_iter,] <- coef_list[[bb_iter]][[ccc]][lll,]
        }
    }
}

for(ccc in 1:length(list_list_coef)) {
    for(lll in 1:length(list_list_coef[[ccc]])) {
        for(j in 1:np){
            list_list_CI[[ccc]][[lll]][,j] <- quantile(list_list_coef[[ccc]][[lll]][,j],probs=c(0.025,0.975),names=FALSE)
        }
    }
}


for(ccc in 1:length(list_list_coef)) {
    for(lll in 1:length(list_list_coef[[ccc]])) {
        for( j in 1:np) {
            list_decision[[ccc]][lll,j] <- (prod(list_list_CI[[ccc]][[lll]][,j]) > 0)*1
        }
    }

}

matrix_decision <- NULL
for(ccc in 1:length(list_list_coef)) {
    temp_matrix <- list_decision[[ccc]] 
    if(nrow(temp_matrix) == 1) {
        rownames(temp_matrix) <- names(list_list_coef)[ccc]
    } else {
        rownames(temp_matrix) <- paste(names(list_list_coef)[ccc], 1:nrow(temp_matrix), sep="")
    }
    if(!grepl(names(list_list_coef)[ccc], pattern="_prb") && !grepl(names(list_list_coef)[ccc], pattern="mregion")) {
        matrix_decision <- rbind(matrix_decision,temp_matrix)
    }
}

matrix_decision_agg <- NULL
for(ccc in 1:length(list_list_coef)) {
    temp_matrix <- list_decision[[ccc]] 
    temp_matrix2 <- matrix(colSums(temp_matrix),nrow=1)
    temp_matrix2[,temp_matrix2 > 0] <- 1
    rownames(temp_matrix2) <- names(list_list_coef)[ccc]
    if(!grepl(names(list_list_coef)[ccc], pattern="_prb") && !grepl(names(list_list_coef)[ccc], pattern="mregion")) {
        matrix_decision_agg <- rbind(matrix_decision_agg,temp_matrix2)
    }
}

matrix_decision_agg <- rbind(matrix_decision_agg,decision_GMRF)

order_matrix1 <- c("cbirthorder","caesarian","deadchildrenD","householdhead","householdmembers","meduC","edupartnerC","csex",
                   "bicycle","electricity","motorcycle","radio","refrigerator","telephone","television",
                   "breastfeeding","cage","mage","mbmi","mheight","mregion")

matrix_decision_agg2 <- matrix_decision_agg[order_matrix1,]






### nonlinear deviation _prb

# list of list of covariates of list of asymmetry, matrix of forecasting points and values
list_list_prb <- list()
for(kkk in k_vec) {
    list_list_prb[[kkk]] <- list()
    for(ttt in 1:np) {
        list_list_prb[[kkk]][[ttt]] <- matrix(NA,ncol=npred_ob,nrow=nbb)    
    }
}
list_list_prb_CI <- list()
for(kkk in k_vec) {
    list_list_prb_CI[[kkk]] <- list()
    for(ttt in 1:np) {
        list_list_prb_CI[[kkk]][[ttt]] <- matrix(NA,ncol=npred_ob,nrow=2)    
    }
}
list_list_prb_CI_scb <- list()
for(kkk in k_vec) {
    list_list_prb_CI_scb[[kkk]] <- list()
    for(ttt in 1:np) {
        list_list_prb_CI_scb[[kkk]][[ttt]] <- matrix(NA,ncol=npred_ob,nrow=2)    
    }
}
list_prb_decision <- list()
for(kkk in k_vec) {
    list_prb_decision[[kkk]] <- matrix(NA,ncol=np,nrow=npred_ob)    
}
list_prb_decision_scb <- list()
for(kkk in k_vec) {
    list_prb_decision_scb[[kkk]] <- matrix(NA,ncol=np,nrow=npred_ob)    
}

names(list_list_prb)     <- names(ndat_list)
names(list_list_prb_CI)  <- names(ndat_list)
names(list_list_prb_CI_scb) <- names(ndat_list)
names(list_prb_decision) <- names(ndat_list)
names(list_prb_decision_scb) <- names(ndat_list)

for(kkk in k_vec) {
    for(ttt in 1:np) {
        for(bb_iter in 1:nbb) {
            list_list_prb[[kkk]][[ttt]][bb_iter,] <-  predict_prb[[bb_iter]][[kkk]][,ttt]
        }
    }
}

for(kkk in k_vec) {
    for(ttt in 1:np) {
        #for(j in 1:npred_ob) {
        list_list_prb_CI[[kkk]][[ttt]][1,] <-  acid::confband.pw(list_list_prb[[kkk]][[ttt]])$lower
        list_list_prb_CI[[kkk]][[ttt]][2,] <-  acid::confband.pw(list_list_prb[[kkk]][[ttt]])$upper
        #}
    }
}


for(kkk in k_vec) {
    for(ttt in 1:np) {
        #for(j in 1:npred_ob) {
        list_list_prb_CI_scb[[kkk]][[ttt]][1,] <-  acid::confband.kneib(list_list_prb[[kkk]][[ttt]])$lower
        list_list_prb_CI_scb[[kkk]][[ttt]][2,] <-  acid::confband.kneib(list_list_prb[[kkk]][[ttt]])$upper
        #}
    }
}


for(kkk in k_vec) {
    for(ttt in 1:np) {
        for(j in 1:npred_ob) {
            list_prb_decision[[kkk]][j,ttt] <- (prod(list_list_prb_CI[[kkk]][[ttt]][,j]) > 0)*1  
        }
    }
}

for(kkk in k_vec) {
    for(ttt in 1:np) {
        for(j in 1:npred_ob) {
            list_prb_decision_scb[[kkk]][j,ttt] <- (prod(list_list_prb_CI_scb[[kkk]][[ttt]][,j]) > 0)*1  
        }
    }
}

matrix_prb_decision <- matrix(NA, nrow=length(k_vec), ncol=np)

for(kkk in k_vec) {
    matrix_prb_decision[kkk,] <- colSums(list_prb_decision[[kkk]])
}
matrix_prb_decision2 <- matrix_prb_decision
matrix_prb_decision2[matrix_prb_decision2 > 0] <- 1

rownames(matrix_prb_decision2) <- paste(names(list_prb_decision),"_prb",sep="")

matrix_prb_decision_scb <- matrix(NA, nrow=length(k_vec), ncol=np)

for(kkk in k_vec) {
    matrix_prb_decision_scb[kkk,] <- colSums(list_prb_decision_scb[[kkk]])
}
matrix_prb_decision_scb2 <- matrix_prb_decision_scb
matrix_prb_decision_scb2[matrix_prb_decision_scb2 > 0] <- 1

matrix_prb_decision_scb3 <- matrix_prb_decision_scb2

rownames(matrix_prb_decision_scb2) <- paste(names(list_prb_decision_scb),"_prb_scb",sep="")
rownames(matrix_prb_decision_scb3) <- paste(names(list_prb_decision_scb),"_prb",sep="")

matrix_decision_agg_all <- rbind(matrix_decision_agg,matrix_prb_decision2,matrix_prb_decision_scb2)
matrix_decision_agg_s <- rbind(matrix_decision_agg,matrix_prb_decision_scb3)

order_matrix1 <- c("cbirthorder","caesarian","deadchildrenD","householdhead","householdmembers","meduC","edupartnerC","csex",
                   "bicycle","electricity","motorcycle","radio","refrigerator","telephone","television",
                   "breastfeeding","breastfeeding_prb","breastfeeding_prb_scb",
                   "cage","cage_prb","cage_prb_scb",
                   "mage","mage_prb","mage_prb_scb",
                   "mbmi","mbmi_prb","mbmi_prb_scb",
                   "mheight","mheight_prb","mheight_prb_scb",
                   "mregion")
order_matrix1_s <- c("cbirthorder","caesarian","deadchildrenD","householdhead","householdmembers","meduC","edupartnerC","csex",
                   "bicycle","electricity","motorcycle","radio","refrigerator","telephone","television",
                   "breastfeeding","breastfeeding_prb",
                   "cage","cage_prb",
                   "mage","mage_prb",
                   "mbmi","mbmi_prb",
                   "mheight","mheight_prb",
                   "mregion")

matrix_decision_agg2 <- matrix_decision_agg_all[order_matrix1,]

matrix_decision_agg2_s <- matrix_decision_agg_s[order_matrix1_s,]

# matplot(ndat_list[[1]][,1],t(list_list_prb[[1]][[1]]),type="l",col="grey",lty=1)
# abline(h=0)

table3 <- renameTable(matrix_decision_agg2_s)
colnames(table3) <- pp
table4 <- renameTable(matrix_decision_agg2)
colnames(table4) <- pp
rownames(table4) <- gsub(rownames(table4),pattern = "nonlinear" , replacement="nonlinear pointwise")
rownames(table4) <- gsub(rownames(table4),pattern = "_prb_scb" , replacement=" & nonlinear SCB")
rownames(table4) <- gsub(rownames(table4),pattern = "cage" , replacement="child's age")
rownames(table4) <- gsub(rownames(table4),pattern = "mage" , replacement="mother's age")
rownames(table4) <- gsub(rownames(table4),pattern = "mbmi" , replacement="mother's bmi")
rownames(table4) <- gsub(rownames(table4),pattern = "mheight" , replacement="mother's height")

xtable(table4)
xtable(table3)

save.image(NameImg)

