You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.4 KiB

## Creates single file containing median variables at the
## tract level for DC Area from the 2010 Census and 2010-2014 through
## 2013-2017 American Community Surveys for tracts in the DC Area
## Set up environment
library(tidyverse)
## Identify variables to keep representing age-race-sex variables
### Abbreviations for racial groups
races <- c(
'blk' ## Black alone
, 'ami' ## American Indian or Native Alaskan alone
, 'asi' ## Asian alone
, 'pac' ## Native Hawaiian or Other Pacific Islander alone
, 'oth' ## Other race alone
, 'two' ## Two or more races
, 'hsp' ## Hispanic or Latino (of any race)
, 'nhw' ## Non-Hispanic White
)
### Age group categories
agegrps <- c(
'0004'
, '0509'
, '1014'
, '1517'
, '1819'
, '2024'
, '2529'
, '3034'
, '3544'
, '4554'
, '5564'
, '6574'
, '7584'
, '85up'
)
### Sex categories
sexes <- c('m', 'f')
arsvars <- as.vector(sapply(races, paste0, sapply(sexes, paste0, agegrps)))
## Identify variables to keep that are not race variables
geovars <- c('GISJOIN', 'STATE', 'COUNTY')
## Set Functions
source('dcarea_functions.R')
## Create dataset that contains only DC-area tracts and constructed
## age-race-sex variables
select.arsvars <- function(dta) {
dta <- select.dcarea(dta)
dta$GISJOIN <- as.character(dta$GISJOIN)
return(dta[, c(geovars, arsvars)])
}
## Load datasets containing median age variables from 2010 TIGER/Line files
load('../US/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.Rdata')
## Create list of data frames for each year and restrict to created
## age-race-sex variables
dtas <- list(trt10_ars, trt14_ars, trt15_ars, trt16_ars, trt17_ars)
arsdtas <- lapply(dtas, select.arsvars)
## Create single wide data frame with year appended to variable name
suffixes <- c('10', 14:17)
namelist <- lapply(suffixes, function(x) c(geovars, paste0(arsvars, x)))
arsdta <- mapply(setNames, arsdtas, namelist, SIMPLIFY = FALSE) %>%
reduce(left_join, by='GISJOIN')
arsdta$STATE <- arsdta$STATE.x
arsdta$COUNTY <- arsdta$COUNTY.x
arsdta <- arsdta[, -(grep('^STATE.+|^COUNTY.+', names(arsdta), perl=TRUE))]
## Write file containing wide dataset to CSV format
write.csv(arsdta, 'tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv')