## Creates single file containing median variables at the ## tract level for DC Area from the 2010 Census and 2010-2014 through ## 2013-2017 American Community Surveys for tracts in the DC Area ## Set up environment library(tidyverse) agevar <- 'mdage' ## Identify variables to keep that are not race variables geovars <- c('GISJOIN', 'STATE', 'COUNTY') ## Set Functions source('dcarea_functions.R') ## Create dataset that contains only DC-area tracts and constructed ## race variables select.agevars <- function(dta) { dta <- select.dcarea(dta) dta$GISJOIN <- as.character(dta$GISJOIN) return(dta[, c(geovars, agevar)]) } ## Load datasets containing median age variables from 2010 TIGER/Line files load('../US/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.Rdata') ## Create list of data frames for each year and restrict to created race ## variables dtas <- list(trt10c_ag, trt14_ag, trt15_ag, trt16_ag, trt17_ag) agedtas <- lapply(dtas, select.agevars) ## Create single wide data frame with year appended to variable name suffixes <- c('10c', 14:17) namelist <- lapply(suffixes, function(x) c(geovars, paste0(agevar, x))) agedta <- mapply(setNames, agedtas, namelist, SIMPLIFY = FALSE) %>% reduce(left_join, by='GISJOIN') agedta$STATE <- agedta$STATE.x agedta$COUNTY <- agedta$COUNTY.x agedta <- agedta[, -(grep('^STATE.+|^COUNTY.+', names(agedta), perl=TRUE))] ## Write file containing wide dataset to CSV format write.csv(agedta, 'tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv')