## Creates single file containing household presence of children variables
## for tracts based on 2010 Census and the 2010-2014 through 2013-2017
## American Community Surveys for tracts in the DC Area
##
## tothh = Total households
## chpr  = Households with children present
## ncpr  = Households with no children present

## Set up environment
library(tidyverse)

## Identify variable abbreviations
presence <- c('tothh', 'chpr', 'ncpr')
ppresence <- paste0('p',presence[-1])
presvars <- c(presence, ppresence)

geovars <- c('GISJOIN', 'STATE', 'COUNTY')

## Set Functions
source('dcarea_functions.R')

## Create dataset that contains only DC-area tracts and constructed
## foreign-born variables
select.presvars <- function(dta) {
    dta <- select.dcarea(dta)
    dta$GISJOIN <- as.character(dta$GISJOIN)
    return(dta[, c(geovars, presvars)])
}

## Load datasets containing children present variables from files based on
## 2010 tracts
load('../US/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.Rdata')

## Create list of data frames for each year and restrict to created
## children present variables
dtas <- list(trt10c_cp, trt14_cp, trt15_cp, trt16_cp, trt17_cp)
presdtas <- lapply(dtas, select.presvars)

## Create single wide data frame with year appended to variable name
namelist <- lapply(c('10c', 14:17),
                   function(x) c(geovars, paste0(presvars, x)))
presdta <- mapply(setNames, presdtas, namelist, SIMPLIFY = FALSE) %>%
    reduce(left_join, by='GISJOIN')
presdta$STATE <- presdta$STATE.x
presdta$COUNTY <- presdta$COUNTY.x

presdta <- presdta[, -(grep('^STATE.+|^COUNTY.+', names(presdta), perl=TRUE))]

## Write file containing wide dataset to CSV format
write.csv(presdta, 'tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv')