You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
63 lines
2.4 KiB
63 lines
2.4 KiB
## File creates single file containing race & ethnicity variables at the
|
|
## tract level for DC Area from the 2010 Census and 2007-2011 through
|
|
## 2011-2015 American Community Surveys.
|
|
##
|
|
## Also includes variables indicating whether a neighborhood qualifies as a
|
|
## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make
|
|
## up at least 10% of the tract and no group is a majority) in each year.
|
|
|
|
|
|
## Set up environment
|
|
library(tidyverse)
|
|
|
|
## Identify race variable names
|
|
races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two')
|
|
praces <- paste0('p', races[-1])
|
|
racevars <- c(races, praces)
|
|
|
|
## Identify variables to keep that are not race variables
|
|
nonracevars <- c('GISJOIN', 'STATE', 'COUNTY')
|
|
|
|
## Set Functions
|
|
source('dcarea_functions.R')
|
|
|
|
## Create dataset that contains only DC-area tracts and constructed
|
|
## race variables
|
|
select.racevars <- function(dta) {
|
|
dta <- select.dcarea(dta)
|
|
dta$GISJOIN <- as.character(dta$GISJOIN)
|
|
return(dta[, c(nonracevars, racevars)])
|
|
}
|
|
|
|
## Identify quadrivial neighborhoods
|
|
id.quads <- function(dta) {
|
|
eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5)
|
|
dta['quad'] <- apply(eligible, 1, all)
|
|
return(dta)
|
|
}
|
|
|
|
# ## Sets variable names for a specific dataset in the `racedtas` list
|
|
# set.varnames <- function(x,y) setNames(racedtas[[x]], y)
|
|
|
|
## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files
|
|
## for US tracts 2010
|
|
load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata')
|
|
|
|
## Create list of data frames for each year and restrict to created race
|
|
## variables
|
|
dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re,
|
|
trt16_re, trt17_re)
|
|
racedtas <- lapply(dtas, select.racevars) %>%
|
|
lapply(id.quads)
|
|
|
|
## Create single wide data frame with year appended to variable name
|
|
namelist <- lapply(10:17,
|
|
function(x) c(nonracevars, paste0(c(racevars, 'quad'), x)))
|
|
racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>%
|
|
reduce(left_join, by='GISJOIN')
|
|
racedta$STATE <- racedta$STATE.x
|
|
racedta$COUNTY <- racedta$COUNTY.x
|
|
racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))]
|
|
|
|
## Write file containing wide dataset to CSV format
|
|
write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv')
|