rm(list=ls())


peru12raw <- read.dta("PEKR6IFL.dta", convert.factors = FALSE )


vars <- character(0)
nams <- character(0)
dim(peru12raw)

# Response related variables:
# HW70     "Ht/A Standard deviations (according to WHO)" = Stunting 

peru12raw$hw70[peru12raw$hw70>9000] <- NA
peru12raw$hw70 <- peru12raw$hw70/100


# HW1       Age of child in months
# V008     "Date of interview (CMC)"  
# B3       "Date of birth (CMC)"      
# M5       "Months of breastfeeding"
# B4       "Sex of child"
# BORD     "Birth order number", eventuell kategorial (ebenso f?r ?hnliche Variablen)
# B0       "Child is twin"

# recode variable breastfeed duration: never breastfeeded=94
peru12raw$b4 [peru12raw$b4==1] <- 0
peru12raw$b4 [peru12raw$b4==2] <- 1
peru12raw$b4 <- as.factor(peru12raw$b4)

peru12raw$m5[peru12raw$m5==94] <- 0
peru12raw$m5[peru12raw$m5>90] <- NA

# HW1 contains many missing values, therefore:
peru12raw$cage<-(peru12raw$v008-peru12raw$b3)



# V445     "Body mass index for respondent"
# V438     "Respondent's height (cms-1d)"
# V012     "Current Age Respondent
# V133     "Education in single years"
# V715     "Partner's education-single years"
# V024     "Region"
# V151     "Sex of householdhead"
# V206     "Sons who have died"
# V207     "Daughters who have died"

peru12raw$v445[peru12raw$v445>9000] <- NA
peru12raw$v438[peru12raw$v438>9000] <- NA
peru12raw$v133[peru12raw$v133>95] <- NA
peru12raw$v715[peru12raw$v715>95] <- NA

peru12raw$v445 <- peru12raw$v445/100
peru12raw$v438 <- peru12raw$v438/10



peru12raw$deadchildren <- peru12raw$v206 + peru12raw$v207
peru12raw$v024 <- factor(peru12raw$v024,levels=1:25, labels = c(
"Amazonas","Ancash","Apurimac","Arequipa","Ayacucho","Cajamarca","Callao","Cusco",
"Huancavelica","Huanuco","Ica","Junin","La_Libertad","Lambayeque","LimaRegion",
"Loreto","Madre_de_Dios","Moquegua","Pasco","Piura","Puno","San_Martin","Tacna","Tumbes","Ucayali"))


peru12raw$v151[peru12raw$v151>95] <- NA
peru12raw$v151[peru12raw$v151==1] <- 0
peru12raw$v151[peru12raw$v151==2] <- 1



# V190     "Wealth index"
# V191     "Wealth index factor score", maybe all other wealth indicators already included
# V119     "Has electricity"
# V120     "Has radio"
# V121     "Has television"
# V122     "Has refrigerator"
# V123     "Has bicycle"
# V124     "Has motorcycle/scooter"
# V153     "Has telephone"
# M17      "caesarian section"

peru12raw$v119[peru12raw$v119>5] <- NA
peru12raw$v120[peru12raw$v120>5] <- NA
peru12raw$v121[peru12raw$v121>5] <- NA
peru12raw$v122[peru12raw$v122>5] <- NA
peru12raw$v123[peru12raw$v123>5] <- NA
peru12raw$v124[peru12raw$v124>5] <- NA
peru12raw$v153[peru12raw$v153>5] <- NA
peru12raw$m17 [peru12raw$m17 >5] <- NA

peru12raw$v121[peru12raw$v121>5] <- NA
peru12raw$v122[peru12raw$v122>5] <- NA
peru12raw$v123[peru12raw$v123>5] <- NA
peru12raw$v124[peru12raw$v124>5] <- NA
peru12raw$v125[peru12raw$v125>5] <- NA
peru12raw$v153[peru12raw$v153>5] <- NA



nams <- c("stunting","householdmembers","cage", "breastfeeding", "csex", 
          "cbirthorder","mbmi", "mheight", "mage" , "medu", "edupartner","mregion",
          "householdhead", "deadchildren","electricity", "radio", "television", 
          "refrigerator", "bicycle", "motorcycle", "telephone","caesarian")


colnames(peru12raw)[colnames(peru12raw) == "hw70"] <- "stunting"
colnames(peru12raw)[colnames(peru12raw) == "v136"] <- "householdmembers"
colnames(peru12raw)[colnames(peru12raw) == "m5"]   <- "breastfeeding"
colnames(peru12raw)[colnames(peru12raw) == "b4"]   <- "csex"
colnames(peru12raw)[colnames(peru12raw) == "bord"] <- "cbirthorder"

colnames(peru12raw)[colnames(peru12raw) == "v445"] <- "mbmi"
colnames(peru12raw)[colnames(peru12raw) == "v438"] <- "mheight"
colnames(peru12raw)[colnames(peru12raw) == "v012"] <- "mage"
colnames(peru12raw)[colnames(peru12raw) == "v133"] <- "medu"

colnames(peru12raw)[colnames(peru12raw) == "v715"] <- "edupartner"
colnames(peru12raw)[colnames(peru12raw) == "v024"] <- "mregion"
colnames(peru12raw)[colnames(peru12raw) == "v151"] <- "householdhead"

colnames(peru12raw)[colnames(peru12raw) == "v119"] <- "electricity"
colnames(peru12raw)[colnames(peru12raw) == "v120"] <- "radio"
colnames(peru12raw)[colnames(peru12raw) == "v121"] <- "television"
colnames(peru12raw)[colnames(peru12raw) == "v122"] <- "refrigerator"

colnames(peru12raw)[colnames(peru12raw) == "v123"] <- "bicycle"
colnames(peru12raw)[colnames(peru12raw) == "v124"] <- "motorcycle"
colnames(peru12raw)[colnames(peru12raw) == "v153"] <- "telephone"
colnames(peru12raw)[colnames(peru12raw) == "m17"]  <- "caesarian"




peru12 <- peru12raw[,nams]






peru12$csex<-as.factor(peru12$csex)

peru12$cbirthorder[peru12$cbirthorder>5] <- 5
peru12$cbirthorder <- factor(peru12$cbirthorder)

peru12$householdmembers[peru12$householdmembers>8] <- 8
peru12$householdmembers[peru12$householdmembers<=3] <- 3


peru12$householdhead <- as.factor(peru12$householdhead)
peru12$deadchildren[peru12$deadchildren > 3] <- 3
peru12$deadchildren <- factor(peru12$deadchildren)

meduC <- rep(NA,times=nrow(peru12))
meduC[peru12$medu <= 6] <- 1
meduC[peru12$medu > 6 & peru12$medu <= 11] <- 2
meduC[peru12$medu > 11] <- 3

edupartnerC <- rep(NA,times=nrow(peru12))
edupartnerC[peru12$edupartner <= 6] <- 1
edupartnerC[peru12$edupartner > 6 & peru12$edupartner <= 11] <- 2
edupartnerC[peru12$edupartner > 11] <- 3

deadchildrenD <- rep(0,nrow(peru12))
deadchildrenD[peru12$deadchildren != 0 ] <- 1

peru12$meduC <- meduC
peru12$edupartnerC <- edupartnerC
peru12$deadchildrenD <- deadchildrenD

list_cov <- c("stunting","householdmembers",
                "cage" , "breastfeeding" , "csex" , "cbirthorder",
                "mbmi" , "mheight" , "mage" , "meduC" , "edupartnerC" , "mregion",
                "householdhead" , "deadchildrenD" , 
                "electricity" , "radio" , "television" , "refrigerator" , "bicycle" , "motorcycle" , "telephone" , 
                "caesarian")

peru12a <- peru12[,list_cov]
summary(peru12a)
peru12s <- peru12a[complete.cases(peru12a),]
summary(peru12s)

# 
# peru12s$meduC            <- as.factor(peru12s$meduC)
# peru12s$edupartnerC      <- as.factor(peru12s$edupartnerC)
# peru12s$deadchildrenD    <- as.factor(peru12s$deadchildrenD)
# peru12s$householdmembers <- as.factor(peru12s$householdmembers)
# peru12s$electricity      <- as.factor(peru12s$electricity)
# peru12s$radio            <- as.factor(peru12s$radio)
# peru12s$television       <- as.factor(peru12s$television)
# peru12s$refrigerator     <- as.factor(peru12s$refrigerator)
# peru12s$bicycle          <- as.factor(peru12s$bicycle)
# peru12s$motorcycle       <- as.factor(peru12s$motorcycle)
# peru12s$telephone        <- as.factor(peru12s$telephone)
# peru12s$caesarian        <- as.factor(peru12s$caesarian)
# 
# 
# 
# summary(peru12s)
# 
# 

# written in desktop folder
write.table(peru12s, "peru12_de_new.csv", col.names=TRUE,quote=F, row.names=FALSE,sep=";",dec=",")

