## Creates single file containing educational attainment variables at the ## 2010-2014 through 2013-2017 American Community Surveys for tracts ## in the DC Area ## tot25o = Total population 25 and older ## lh = Less than high school ## hs = High school or GED ## sc = Some college ## aa = Associate's degree ## ba = Bachelor's degree ## gr = Graduate degree ## Set up environment library(tidyverse) ## Identify variable abbreviations attain <- c('tot25o', 'lh', 'hs', 'sc', 'aa', 'ba', 'gr') pattain <- paste0('p',attain[-1]) attainvars <- c(attain, pattain) geovars <- c('GISJOIN', 'STATE', 'COUNTY') ## Set Functions source('dcarea_functions.R') ## Create dataset that contains only DC-area tracts and constructed ## educational attainment variables select.attainvars <- function(dta) { dta <- select.dcarea(dta) dta$GISJOIN <- as.character(dta$GISJOIN) return(dta[, c(geovars, attainvars)]) } ## Load datasets containing educational attainment variables from files based on ## 2010 tracts load('../US/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.Rdata') ## Create list of data frames for each year and restrict to created ## educational attainmnet variables dtas <- list(trt14_ed, trt15_ed, trt16_ed, trt17_ed) attaindtas <- lapply(dtas, select.attainvars) ## Create single wide data frame with year appended to variable name namelist <- lapply(14:17, function(x) c(geovars, paste0(attainvars, x))) attaindta <- mapply(setNames, attaindtas, namelist, SIMPLIFY = FALSE) %>% reduce(left_join, by='GISJOIN') attaindta$STATE <- attaindta$STATE.x attaindta$COUNTY <- attaindta$COUNTY.x attaindta <- attaindta[, -(grep('^STATE.+|^COUNTY.+', names(attaindta), perl=TRUE))] ## Write file containing wide dataset to CSV format write.csv(attaindta, 'tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv')