## File creates single file containing race & ethnicity variables at the ## tract level for DC Area from the 2010 Census and 2007-2011 through ## 2011-2015 American Community Surveys. ## ## Also includes variables indicating whether a neighborhood qualifies as a ## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make ## up at least 10% of the tract and no group is a majority) in each year. ## Set up environment library(tidyverse) ## Identify race variable names races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two') praces <- paste0('p', races[-1]) racevars <- c(races, praces) ## Identify variables to keep that are not race variables nonracevars <- c('GISJOIN', 'STATE', 'COUNTY') ## Set Functions source('dcarea_functions.R') ## Create dataset that contains only DC-area tracts and constructed ## race variables select.racevars <- function(dta) { dta <- select.dcarea(dta) dta$GISJOIN <- as.character(dta$GISJOIN) return(dta[, c(nonracevars, racevars)]) } ## Identify quadrivial neighborhoods id.quads <- function(dta) { eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5) dta['quad'] <- apply(eligible, 1, all) return(dta) } # ## Sets variable names for a specific dataset in the `racedtas` list # set.varnames <- function(x,y) setNames(racedtas[[x]], y) ## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files ## for US tracts 2010 load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata') ## Create list of data frames for each year and restrict to created race ## variables dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re, trt16_re, trt17_re) racedtas <- lapply(dtas, select.racevars) %>% lapply(id.quads) ## Create single wide data frame with year appended to variable name namelist <- lapply(10:17, function(x) c(nonracevars, paste0(c(racevars, 'quad'), x))) racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>% reduce(left_join, by='GISJOIN') racedta$STATE <- racedta$STATE.x racedta$COUNTY <- racedta$COUNTY.x racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))] ## Write file containing wide dataset to CSV format write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv')