rm(list=ls())

# Files downloaded at 2017 04 23 stored and unzipped in the directory:
# "C:/Users/Elmar/Documents/ClimateData"

# Note: The DWD changed their data structure at 2017 06 01

Meta_Data1 <- read.fwf("C:/Users/Elmar/Documents/ClimateData/KL_Tageswerte_Beschreibung_Stationen.txt",
                  widths=c(nchar("Stations_id"), 
                           nchar(" von_datum"),
                           nchar(" bis_datum"),
                           nchar(" Stationshoehe"),
                           nchar(" geoBreite "),
                           nchar("geoLaenge ")),
                  header=F, stringsAsFactors=F)
Meta_Data2 <- Meta_Data1[-c(1,2),]
colnames(Meta_Data2) <- gsub(pattern=" ", replacement="", x=Meta_Data1[1,])
Meta_Data3 <- Meta_Data2

Meta_Data3$Stations_id <- as.numeric(Meta_Data2$Stations_id)
Meta_Data3$von_datum <- as.numeric(Meta_Data2$von_datum)
Meta_Data3$bis_datum <- as.numeric(Meta_Data2$bis_datum)
Meta_Data3$Stationshoehe <- as.numeric(Meta_Data2$Stationshoehe)
Meta_Data3$geoBreite <- as.numeric(Meta_Data2$geoBreite)
Meta_Data3$geoLaenge <- as.numeric(Meta_Data2$geoLaenge)




Meta_Data <- Meta_Data3[,c("Stations_id","Stationshoehe","geoBreite","geoLaenge")]



list_directories <- list.dirs(path = "C:/Users/Elmar/Documents/ClimateData", full.names = FALSE, recursive = FALSE)
list_directories_full <- list.dirs(path = "C:/Users/Elmar/Documents/ClimateData", full.names = TRUE, recursive = FALSE)



List_str_split <- strsplit(list_directories,split="_")

Stations_id_vec <- NULL

for(i in 1:length(List_str_split)) {
    Stations_id_vec <- c(Stations_id_vec,List_str_split[[i]][2])
}

Stations_id_vec <- as.numeric(Stations_id_vec)
Stations_id_vec <- Stations_id_vec[(Stations_id_vec %in% Meta_Data$Stations_id ) ]

Huge_Matrix <- NULL

itemizeDates <- function(startDate="2012-12-31", endDate="2013-05-01", 
                         format="%Y%m%d") {
    out <- seq(as.Date(startDate, format=format), 
               as.Date(endDate, format=format), by="days")  
    format(out, format)
}

startDate="19800101"
endDate="20141231"
vec_Date <- itemizeDates(startDate=startDate,endDate=endDate)

print(length(Stations_id_vec))

properties1 <- matrix(NA, nrow=length(Stations_id_vec),ncol=5)
colnames(properties1) <- c("STATIONS_ID","nrow","n999","elevation","criterion")

for(i in 1:length(Stations_id_vec)) { # length(Stations_id_vec)
    
    cat(i," ")
    Stations_id_1 <- Stations_id_vec[i]
    
    
    direct_name <- paste("tageswerte", formatC(Stations_id_1, width = 5, format = "d", flag = "0"),sep="_")
    
    direct <- list_directories_full[which(grepl(x=list_directories_full,pattern=direct_name))]
    list_files_direct <- list.files(direct,full.name=T)
    table_ID <- read.table(list_files_direct[which(grepl(x=list_files_direct,pattern="produkt"))],sep=";",dec=".",header=T)
    
    table_ID2 <- table_ID[table_ID$MESS_DATUM >= as.numeric(startDate) & table_ID$MESS_DATUM <= as.numeric(endDate), c("STATIONS_ID", "MESS_DATUM",  "LUFTTEMPERATUR")]
    
    properties1[i,"STATIONS_ID"] <- Stations_id_1
    properties1[i,"nrow"]        <- dim(table_ID2)[1]
    properties1[i,"n999"]        <- sum(table_ID2$LUFTTEMPERATUR == -999)
    properties1[i,"elevation" ]  <- Meta_Data[Meta_Data$Stations_id == Stations_id_1,"Stationshoehe"]
    properties1[i,"criterion"]   <- length(vec_Date) - dim(table_ID2)[1] + sum(table_ID2$LUFTTEMPERATUR == -999)
    
    if(properties1[i,"criterion"] < 4024 | (properties1[i,"elevation"] > 900 & properties1[i,"nrow"] > 3650+365 & properties1[i,"n999"] < 365)) {
        Huge_Matrix <- rbind(Huge_Matrix,table_ID2)
    }
    
}



if(!all(unique(Huge_Matrix$STATIONS_ID) %in% Meta_Data$Stations_id)) stop("Missing in Meta_Data")


colnames(Meta_Data)[colnames(Meta_Data) == "Stations_id"] <- "STATIONS_ID"

Huge2 <- merge(Huge_Matrix,Meta_Data,by="STATIONS_ID")

Huge2$year <- as.numeric(substr(as.character(Huge2$MESS_DATUM),1,4))
Huge2$day <- as.numeric(strftime(as.Date(as.character(Huge2$MESS_DATUM), format="%Y%m%d"), format = "%j"))

colnames(Huge2)[colnames(Huge2) == "LUFTTEMPERATUR"] <- "raw"
colnames(Huge2)[colnames(Huge2) == "geoBreite"] <- "lat"
colnames(Huge2)[colnames(Huge2) == "geoLaenge"] <- "lon"
colnames(Huge2)[colnames(Huge2) == "Stationshoehe"] <- "elevation"

summary(Huge2)
Huge3 <- merge(properties1,Meta_Data,by="STATIONS_ID")

write.table(x=Huge2,file="Table_Temperatur_new3b.txt",sep=";",dec=".",row.names=F)
