## Creates single file containing median variables at the
## 2010-2014 through 2013-2017 American Community Surveys for tracts
## in the DC Area

## Set up environment
library(tidyverse)

## Identify marital status variable names
statuses <- c('mar', 'nvm', 'wid', 'div')
pstatuses <- paste0('p', statuses)
statusvars <- c(statuses, pstatuses)

## Identify geographic identifiers to keep
geovars <- c('GISJOIN', 'STATE', 'COUNTY')

## Set Functions
source('dcarea_functions.R')

## Define function to create dataset that contains only DC-area tracts and
## constructed marital status variables
select.statusvars <- function(dta) {
    dta <- select.dcarea(dta)
    dta$GISJOIN <- as.character(dta$GISJOIN)
    return(dta[, c(geovars, statusvars)])
}

## Load datasets containing marital status variables using data from
## files based on 2010 tracts
load('../US/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.Rdata')

## Create list of data frames for each year and restrict to created
## marital status variables
dtas <- list(trt14_ms, trt15_ms, trt16_ms, trt17_ms)
statusdtas <- lapply(dtas, select.statusvars)

## Create single wide data frame with year appended to variable name
namelist <- lapply(14:17, function(x) c(geovars, paste0(statusvars, x)))
statusdta <- mapply(setNames, statusdtas, namelist, SIMPLIFY = FALSE) %>%
    reduce(left_join, by='GISJOIN')
statusdta$STATE <- statusdta$STATE.x
statusdta$COUNTY <- statusdta$COUNTY.x
statusdta <- statusdta[, -(grep('^STATE.+|^COUNTY.+', names(statusdta), perl=TRUE))]

## Write file containing wide dataset to CSV format
write.csv(statusdta,
          'tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv')