
# Main function to apply expectile regression based on the mgcv package
expectreg_bam_smooth <- function(formula, data, expectiles, sm_par_vec, fixed = FALSE, 
                                 delta = 0.0001, step_max = 10, reltol = 1e-8,
                                 initial_w = TRUE, 
                                 opt_type=c("gcv","schall"), dev_schall = c("dev","gcv","non"),
                                 quietly=FALSE, bounce_off=FALSE){
    # formula    is the formula of the model, similarly as in the mgcv package
    # data       are the data to be analysed
    # expectiles is the current asymmetry level. Just a scalar possible, no vector!
    # sm_par_vec are the initial smoothing parameters
    # fixed      specifies whether the smoothing parameters are fixed or should be estimated
    # delta      is the convergence level of the schall algorithm
    # step_max   is the maximal number of step halving iterations in the smoothing parameter estimation via the Schall algorithm
    # reltol     is the relative tolerance in the optimization of the smoothing parameter via optim (Nelder Mead)
    # initial_w  is a binary variable to decide whether initial weights should be used in the estimation procedure. This will increase the speed.
    # opt_type   Whether the schall algorithm, or the gcv  optimization should be applied.
    # dev_schall Which type of criterion should be applied to check, whether the new smoothing parameter results in better estimates. Either MWSE -> dev, gcv, or none
    # quietly    Should the procedure run in silence or trace should be shown
    # bounce off If in the Nelder-Mead optimization of the gcv a large fake gcv should be returned when the proposed smoothing parameter is not in the interval (1e-5,1e5). This is done since the original Melder-Nead does not consider limits.
    
    
    
    
        
    opt_type   <- match.arg(opt_type)
    dev_schall <- match.arg(dev_schall)
    
    # Check whether the initial smoothing parameters are in the proposed range of c(1e-5,1e5)
    if(!is.null(sm_par_vec) & !fixed) {
        sm_par_vec[sm_par_vec > 0.99999e+5] <- 0.99999e+5
        sm_par_vec[sm_par_vec < 1.00001e-5] <- 1.00001e-5
    }
    
    # Apply an initial expectile regression with the initial smoothing parameters to get the initial weights.
    res <- expectreg_bam(formula=formula, data=data,expectiles=expectiles,sm_par_vec=sm_par_vec,quietly=quietly, initial_weights = rep(0.5,times=nrow(data)))
    initial_weights <- rep(0.5,times=nrow(data))
    if(initial_w) initial_weights <- res$w1
    
    # When the smoothing parameters should be optimized, apply the optimization
    if(!fixed) {
        if(!quietly) cat("\n Initial weights estimated \n")
        # If the Schall Algorithm should be applied
        if(opt_type == "schall") {
            model_optim <- expectreg_bam_schall(formula=formula, data=data, expectiles=expectiles, sm_par_vec=sm_par_vec,
                                                delta=delta, step_max=step_max, dev_schall=dev_schall, 
                                                quietly=quietly, initial_weights = initial_weights)
            sm_par_vec <- model_optim$sm_par_vec
        }
        # If the smoothing parameters should be optimzied via gcv and Nelder-Mead
        if(opt_type == "gcv") {
            # Opimization via gcv is applied on the log scale for the steps (internally reconverted to normal scale)
            sm_par_log <- log(sm_par_vec)
            trace1 <- 1
            if(quietly) trace1 <- 0
            # expectreg_bam_temp is the function that reconverts the smoothing parameters back to the normal scale,
            # checks whether the proposed parameters are in the correct range and gives the gcv as the only result.
            model_optim <- optim(par=sm_par_log, fn=expectreg_bam_temp, 
                                 formula=formula, data=data, expectiles=expectiles, 
                                 quietly = quietly, initial_weights = initial_weights, 
                                 bounce_off=bounce_off,
                                 control=list("reltol"=reltol,trace=trace1))
            # Convert the optimal smoothing parameter back to the normal scale
            sm_par_log <- model_optim$par
            sm_par_vec <- exp(sm_par_log)
        }
        
        if(!quietly) cat("\n Final Smoothing Parameter: \n")
        if(!quietly) print(sm_par_vec)
        # Estimate the model based on the optimal smoothing parameters, or the fixed smoothing parameters
        res <- expectreg_bam(formula=formula, data=data,expectiles=expectiles,sm_par_vec=sm_par_vec,quietly=quietly, initial_weights = initial_weights)
        res$model_optim <- model_optim
    }
    
    
    res   
}

# Function that runs the expectile regression for fixed smoothing parameters.
# Internally used in optimization via optim-gcv, schall-algorithm or for fixed smoothing parameters.
# For internal use only! Apply expectreg_bam_smooth instead!
expectreg_bam <- function(formula,data,expectiles,sm_par_vec,quietly, initial_weights) {
    # formula         is the formula of the model, similarly as in the mgcv package
    # data            are the data to be analysed
    # expectiles      is the current asymmetry level. Just a scalar possible, no vector!
    # sm_par_vec      are the fixed smoothing parameters
    # quietly         Should the procedure run in silence or trace should be shown
    # initial_weights Initial weights of previous expectile runs, or a vector of 0.5 and length = nrow(data). To speed up the estimation
    
    dw1 <- 1               # delta weights, so are there any differences in the weights to the previous run?
    it <- 1                # Number of iterations
    w1 <- initial_weights  
    data$w1 <- w1
    
    for(it in 1:50) {
        if(!quietly) cat(it)
        w1 <- data$w1
        w2 <- w1
        if(!quietly) cat("i")
        # Estimation of a weighted linear regression for the estimation of the new coefficients
        model <- bam(formula=formula, data=data, family=gaussian(),weights=w1,fit=T,method="GCV.Cp",sp=sm_par_vec)
        if(!quietly) cat("I")
        
        # Calculation of new weights
        z1 <- model$fitted
        w1[] <- expectiles
        w1[!(model$y > z1)] <- 1-expectiles
        data$w1 <- w1
        
        # Compare old and new weights
        dw1 <- sum(w1 != w2,na.rm=TRUE) 
        if(dw1 == 0) break
        gc()
    }
    
    gcv <- model$gcv.ubre
    if(!quietly) cat("\n")
    list("model"=model, "w1"=w1, "gcv"=gcv, "expectiles"=expectiles, "it"=it, 
         "fitted"=model$fitted, "coefficients" = model$coefficients, "sm_par_vec"=sm_par_vec)
}


# Function that is applied in optim
# It converts the log-smoothing parameters back to the normal scale,
# Checks whether the proposed smoothing_parameters are in the correct range
# Applies the expectile regression with fixed smoothing parameters and
# returns the estimated gcv, or 1e10 if the smoothing parameters are "wrong" -> bounce off.
# For internal use only! Apply expectreg_bam_smooth instead!
expectreg_bam_temp <- function(sm_par_log,formula,data,expectiles,quietly=FALSE, initial_weights, bounce_off) {
    sm_par_vec <- exp(sm_par_log)
    res <- 0
    if(any(sm_par_vec < 1e-5) | any(sm_par_vec > 1e+5)) {
        res <- 1e10 
    }
    if(!bounce_off | res < 0.9e10) { 
        model_eval <- expectreg_bam(formula=formula,data=data,expectiles=expectiles,sm_par_vec=sm_par_vec,quietly=quietly, initial_weights = initial_weights)
        res <- model_eval$gcv
    }
    res
}





# Function that runs the schall algorithm
# For internal use only! Apply expectreg_bam_smooth instead!
expectreg_bam_schall <- function(formula, data, expectiles, sm_par_vec, delta=0.01,
                                 step_max=10, dev_schall, quietly=FALSE, initial_weights) {
    # formula         is the formula of the model, similarly as in the mgcv package
    # data            are the data to be analysed
    # expectiles      is the current asymmetry level. Just a scalar possible, no vector!
    # sm_par_vec      are the initial smoothing parameters
    # delta           is the convergence level of the schall algorithm
    # step_max        is the maximal number of step halving iterations in the smoothing parameter estimation via the Schall algorithm
    # dev_schall      Which type of criterion should be applied to check, whether the new smoothing parameter results in better estimates. Either MWSE -> dev, gcv, or none
    # quietly         Should the procedure run in silence or trace should be shown
    # initial_weights Initial weights of previous expectile runs, or a vector of 0.5 and length = nrow(data). To speed up the estimation
    
    
    # Estimation of an initial model specification to get the penalty matrix
    w1 <- rep(0.5,times=nrow(data))
    data$w1 <- w1
    model_temp <- bam(formula=formula, data=data, family=gaussian(),weights=w1,method="GCV.Cp",sp=sm_par_vec, fit=F)
    gc()
    if(!quietly) cat("0")
    
    # Calculation of the penalty matrix
    S_temp <- matrix(0,nrow=ncol(model_temp$X),ncol=ncol(model_temp$X))
    S1 <- list()
    jj2 <- 1
    for(jj1 in 1:length(model_temp$smooth)){
        for(jj3 in 1:length(model_temp$smooth[[jj1]]$S)) {
            S1[[jj2]] <- S_temp
            S1[[jj2]][model_temp$smooth[[jj1]]$first.para : model_temp$smooth[[jj1]]$last.para, 
                      model_temp$smooth[[jj1]]$first.para : model_temp$smooth[[jj1]]$last.para] <-
                model_temp$smooth[[jj1]]$S[[jj3]]
            jj2 <- jj2+1
        }
    }
    if(!quietly) cat("0")
    smooth_list <- model_temp$smooth
    rm("model_temp")
    gc()
    
    # Estimation of an initial model, with the initial smoothing parameters.
    sm_par_vec_new <- sm_par_vec
    sm_par_vec_new[] <- 0
    sm_par_vec_old <- sm_par_vec_star <- sm_par_vec_new
    deviance_model <- Inf
    iter_step <- 0
    if(!quietly) cat("\n Initial smoothing parameter: \n")
    if(!quietly) print((sm_par_vec))
    
    model2 <- expectreg_bam(formula=formula,data=data,expectiles=expectiles,sm_par_vec=sm_par_vec,quietly=quietly, initial_weights = initial_weights)
    model1 <- model2$model
    
    
    iter <- 1         # Number of recalculations of the smoothing parameters 
    iter_step <- 0    # Number of step-halving steps during one iter-step
    conv1 <- FALSE    # Covergence criterion
    conv2 <- FALSE    # Covergence criterion
    
    # Loop for the recalculation of the smoothing parameters
    while(iter < 100 & !conv1 & !conv2){
        if(!quietly) cat("\n iter: ", iter, "\n")
        
        # Recalculate the penalty matrix, based on the currect smoothing parameter
        S_lambda <- S_temp
        for(j in 1:length(S1)) {
            S_lambda <- S_lambda + sm_par_vec[j] * S1[[j]]
        }
        
        beta1c <- matrix(model1$coef,ncol=1)
        
        penalty_model1 <- as.numeric(t(beta1c) %*% S_lambda %*% beta1c)
        
        # Calculate the penalized MWSE (here we call MWSE deviance)
        # Criterion for step halving
        deviance_model1 <- model1$deviance + penalty_model1
        
        if(dev_schall == "gcv") {
            deviance_model1 <- model1$gcv
        }
        
        # Recalculation of the smoothing parameters based on the formula of Wood and Fasiolo (2017)
        XWXS1 <- model1$Vp/model1$sig2
        sm_par_vec_star[] <- 0
        Sinv <- build_S_lambda_inv(smooth_list=smooth_list, lambda_vec=sm_par_vec, S_temp2=S_temp)
            
        for(j in 1:length(sm_par_vec)) {
            aa <- sum(diag(Sinv%*%S1[[j]]))
            if(aa < 1e-15) {
                stop("ginv is 0!")
            }
            sm_par_vec_star[j] <- (aa - sum(diag(XWXS1 %*% S1[[j]])))/(t(beta1c)%*%S1[[j]]%*%beta1c)*model1$sig2 * sm_par_vec[j]
        }
        if(any(sm_par_vec_star < 0 )) stop("negative sm_par_vec")
        
        # Estimate the new coefficients based on the new smoothing parameters and 
        # apply step halving on the smoothing parameters if the new estimates are worse than before
        dev_crit <- FALSE
        conv3 <- FALSE
        while(iter_step <= step_max & !dev_crit & !conv3){
            if(!quietly) cat("\n iter: ", iter, " iter_step: ", iter_step, "\n")
            
            # Apply step halving (Doesn't do anything for iter_step = 0)
            sm_par_vec_new <- (sm_par_vec_star - sm_par_vec) / (2^iter_step) + sm_par_vec
            
            # Check the correctness of the new smoothing parameters
            if(any(sm_par_vec_new <0 )) stop("negative sm_par_vec")
            sm_par_vec_new[sm_par_vec_new > 1e+5] <- 1e+5
            sm_par_vec_new[sm_par_vec_new < 1e-5] <- 1e-5
            
            # Apply expectile regression for the estimation of the coefficients
            model2_new <- expectreg_bam(formula=formula, data=data, expectiles=expectiles, sm_par_vec=sm_par_vec_new, quietly=quietly, initial_weights = initial_weights)
            model1_new <- model2_new$model
            
            # Calculate the penalty matrix to get the stop criteria for step halving
            S_lambda_new <- S_temp
            for(j in 1:length(S1)) {
                S_lambda_new <- S_lambda_new + sm_par_vec_new[j] * S1[[j]]
            }
            
            beta1c_new <- matrix(model1_new$coef,ncol=1)
            
            penalty_model1_new <- as.numeric(t(beta1c_new) %*% S_lambda_new %*% beta1c_new)
            
            deviance_model1_new <- model1_new$deviance + penalty_model1_new
            
            if(dev_schall == "gcv") {
                deviance_model1_new <- model1_new$gcv
            }
            
            if(!quietly) {
                cat("\n New smoothing parameter: \n")
                print((sm_par_vec_new))
                cat("Old smoothing parameter: \n")
                print((sm_par_vec))
                cat("Relative Difference: \n")
                print(abs(sm_par_vec_new - sm_par_vec)/sm_par_vec)
                cat("Absolute Difference: \n")
                print(abs(sm_par_vec_new - sm_par_vec))
                cat("Difference Deviance \n")
                print(deviance_model1_new - deviance_model1)
            }
            
            # Check convergence of smoothing parameters
            if(dev_schall == "non" || deviance_model1_new < deviance_model1) {
                dev_crit <- TRUE
                iter_step <- 0
            } else {
                iter_step <- iter_step + 1
            }
            if(all((abs(sm_par_vec_new - sm_par_vec)/sm_par_vec < delta) | (abs(sm_par_vec_new - sm_par_vec) < delta))) {
                conv3 <- TRUE
            }  else {
                conv3 <- FALSE
            }
        } # End of loop for coefficient calculation and step halving
        
        if(iter_step > step_max) {
            conv2 <- TRUE
        }
        
        # Check convergence of smoothing parameters
        if(!conv2){
            if(all((abs(sm_par_vec_new - sm_par_vec)/sm_par_vec < delta) | (abs(sm_par_vec_new - sm_par_vec) < delta))) {
                conv1 <- TRUE
            }  else {
                model1 <- model1_new
                sm_par_vec <- sm_par_vec_new
            }
        }
        iter <- iter + 1
    } # End of recalculation of smoothing parameters
    
    if(iter==100) warning("Schall did not converge!")
    if(!conv3 & conv2) warning("iter_step > step_max & rel. Difference > delta")
    
    list("sm_par_vec"=sm_par_vec, "iter"=iter, "iter_step"=iter_step, 
         "conv1"=conv1, "conv2"=conv2, "conv3"=conv3)
}






# Function to build the inverse of the penalty matrix.
# Therefore each element of the blockdiagonal matrix is inverted
build_S_lambda_inv <- function(smooth_list, lambda_vec, S_temp2){
    jj2 <- 1
    for(jj1 in 1:length(smooth_list)){
        S_lambda_temp <- 0
        for(jj3 in 1:length(smooth_list[[jj1]]$S)) {
            S_lambda_temp <- S_lambda_temp + smooth_list[[jj1]]$S[[jj3]] * lambda_vec[jj2]
            jj2 <- jj2+1
        }
        S_temp2[smooth_list[[jj1]]$first.para : smooth_list[[jj1]]$last.para, 
                smooth_list[[jj1]]$first.para : smooth_list[[jj1]]$last.para] <-
            ginv(S_lambda_temp)
    }
    S_temp2
}
