You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.9 KiB

## Creates single file containing educational attainment variables at the
## 2010-2014 through 2013-2017 American Community Surveys for tracts
## in the DC Area
## tot25o = Total population 25 and older
## lh = Less than high school
## hs = High school or GED
## sc = Some college
## aa = Associate's degree
## ba = Bachelor's degree
## gr = Graduate degree
## Set up environment
library(tidyverse)
## Identify variable abbreviations
attain <- c('tot25o', 'lh', 'hs', 'sc', 'aa', 'ba', 'gr')
pattain <- paste0('p',attain[-1])
attainvars <- c(attain, pattain)
geovars <- c('GISJOIN', 'STATE', 'COUNTY')
## Set Functions
source('dcarea_functions.R')
## Create dataset that contains only DC-area tracts and constructed
## educational attainment variables
select.attainvars <- function(dta) {
dta <- select.dcarea(dta)
dta$GISJOIN <- as.character(dta$GISJOIN)
return(dta[, c(geovars, attainvars)])
}
## Load datasets containing educational attainment variables from files based on
## 2010 tracts
load('../US/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.Rdata')
## Create list of data frames for each year and restrict to created
## educational attainmnet variables
dtas <- list(trt14_ed, trt15_ed, trt16_ed, trt17_ed)
attaindtas <- lapply(dtas, select.attainvars)
## Create single wide data frame with year appended to variable name
namelist <- lapply(14:17,
function(x) c(geovars, paste0(attainvars, x)))
attaindta <- mapply(setNames, attaindtas, namelist, SIMPLIFY = FALSE) %>%
reduce(left_join, by='GISJOIN')
attaindta$STATE <- attaindta$STATE.x
attaindta$COUNTY <- attaindta$COUNTY.x
attaindta <- attaindta[, -(grep('^STATE.+|^COUNTY.+', names(attaindta), perl=TRUE))]
## Write file containing wide dataset to CSV format
write.csv(attaindta, 'tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv')