You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
2.4 KiB

## File creates single file containing race & ethnicity variables at the
## tract level for DC Area from the 2010 Census and 2007-2011 through
## 2011-2015 American Community Surveys.
##
## Also includes variables indicating whether a neighborhood qualifies as a
## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make
## up at least 10% of the tract and no group is a majority) in each year.
## Set up environment
library(tidyverse)
## Identify race variable names
races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two')
praces <- paste0('p', races[-1])
racevars <- c(races, praces)
## Identify variables to keep that are not race variables
nonracevars <- c('GISJOIN', 'STATE', 'COUNTY')
## Set Functions
source('dcarea_functions.R')
## Create dataset that contains only DC-area tracts and constructed
## race variables
select.racevars <- function(dta) {
dta <- select.dcarea(dta)
dta$GISJOIN <- as.character(dta$GISJOIN)
return(dta[, c(nonracevars, racevars)])
}
## Identify quadrivial neighborhoods
id.quads <- function(dta) {
eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5)
dta['quad'] <- apply(eligible, 1, all)
return(dta)
}
# ## Sets variable names for a specific dataset in the `racedtas` list
# set.varnames <- function(x,y) setNames(racedtas[[x]], y)
## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files
## for US tracts 2010
load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata')
## Create list of data frames for each year and restrict to created race
## variables
dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re,
trt16_re, trt17_re)
racedtas <- lapply(dtas, select.racevars) %>%
lapply(id.quads)
## Create single wide data frame with year appended to variable name
namelist <- lapply(10:17,
function(x) c(nonracevars, paste0(c(racevars, 'quad'), x)))
racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>%
reduce(left_join, by='GISJOIN')
racedta$STATE <- racedta$STATE.x
racedta$COUNTY <- racedta$COUNTY.x
racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))]
## Write file containing wide dataset to CSV format
write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv')