## File creates single file containing race & ethnicity variables at the
## tract level for DC Area from the 2010 Census and 2007-2011 through
## 2011-2015 American Community Surveys.
##
## Also includes variables indicating whether a neighborhood qualifies as a
## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make
## up at least 10% of the tract and no group is a majority) in each year.


## Set up environment
library(tidyverse)

## Identify race variable names
races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two')
praces <- paste0('p', races[-1])
racevars <- c(races, praces)

## Identify variables to keep that are not race variables
nonracevars <- c('GISJOIN', 'STATE', 'COUNTY')

## Set Functions
source('dcarea_functions.R')

## Create dataset that contains only DC-area tracts and constructed
## race variables
select.racevars <- function(dta) {
    dta <- select.dcarea(dta)
    dta$GISJOIN <- as.character(dta$GISJOIN)
    return(dta[, c(nonracevars, racevars)])
}

## Identify quadrivial neighborhoods
id.quads <- function(dta) {
    eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5)
    dta['quad'] <- apply(eligible, 1, all)
    return(dta)
}

# ## Sets variable names for a specific dataset in the `racedtas` list
# set.varnames <- function(x,y) setNames(racedtas[[x]], y)

## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files
## for US tracts 2010
load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata')

## Create list of data frames for each year and restrict to created race
## variables
dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re,
             trt16_re, trt17_re)
racedtas <- lapply(dtas, select.racevars) %>%
    lapply(id.quads)

## Create single wide data frame with year appended to variable name
namelist <- lapply(10:17,
                   function(x) c(nonracevars, paste0(c(racevars, 'quad'), x)))
racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>%
    reduce(left_join, by='GISJOIN')
racedta$STATE <- racedta$STATE.x
racedta$COUNTY <- racedta$COUNTY.x
racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))]

## Write file containing wide dataset to CSV format
write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv')