data-nhgis-dcarea/tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-201...


								## File creates single file containing race & ethnicity variables at the

								## tract level for DC Area from the 2010 Census and 2007-2011 through

								## 2011-2015 American Community Surveys.

								##

								## Also includes variables indicating whether a neighborhood qualifies as a

								## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make

								## up at least 10% of the tract and no group is a majority) in each year.


								## Set up environment

								library(tidyverse)


								## Identify race variable names

								races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two')

								praces <- paste0('p', races[-1])

								racevars <- c(races, praces)


								## Identify variables to keep that are not race variables

								nonracevars <- c('GISJOIN', 'STATE', 'COUNTY')


								## Set Functions

								source('dcarea_functions.R')


								## Create dataset that contains only DC-area tracts and constructed

								## race variables

								select.racevars <- function(dta) {

								    dta <- select.dcarea(dta)

								    dta$GISJOIN <- as.character(dta$GISJOIN)

								    return(dta[, c(nonracevars, racevars)])

								}


								## Identify quadrivial neighborhoods

								id.quads <- function(dta) {

								    eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5)

								    dta['quad'] <- apply(eligible, 1, all)

								    return(dta)

								}


								# ## Sets variable names for a specific dataset in the `racedtas` list

								# set.varnames <- function(x,y) setNames(racedtas[[x]], y)


								## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files

								## for US tracts 2010

								load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata')


								## Create list of data frames for each year and restrict to created race

								## variables

								dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re,

								             trt16_re, trt17_re)

								racedtas <- lapply(dtas, select.racevars) %>%

								    lapply(id.quads)


								## Create single wide data frame with year appended to variable name

								namelist <- lapply(10:17,

								                   function(x) c(nonracevars, paste0(c(racevars, 'quad'), x)))

								racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>%

								    reduce(left_join, by='GISJOIN')

								racedta$STATE <- racedta$STATE.x

								racedta$COUNTY <- racedta$COUNTY.x

								racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))]


								## Write file containing wide dataset to CSV format

								write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv')