From c22b6c0c95092d61c0eddfc1a85e2fbb8501ecc8 Mon Sep 17 00:00:00 2001 From: mikebader Date: Fri, 29 Oct 2021 13:18:50 -0400 Subject: [PATCH] Initial commit --- .gitattributes | 3 + .gitignore | 6 ++ dcarea_functions.R | 34 ++++++++++ .../create-tracts-1980TIGER-race-ethnicity.R | 24 +++++++ .../dataset/tracts-1980TIGER-race-ethnicity.csv | 3 + .../construction/tracts-2010TIGER-age-race-sex.R | 78 ++++++++++++++++++++++ .../dataset/tracts-2010TIGER-age-race-sex.csv | 3 + .../construct-tracts-2010TIGER-children-pres.R | 50 ++++++++++++++ .../dataset/tracts-2010TIGER-children-present.csv | 3 + .../construct-tracts-2010TIGER-educ-attainment.R | 53 +++++++++++++++ .../dataset/tracts-2010TIGER-educ-attainment.csv | 3 + .../construct-tracts-2010TIGER-foreign-born.R | 59 ++++++++++++++++ .../dataset/tracts-2010TIGER-foreign-born.csv | 3 + .../construct-tracts-2010TIGER-marital-status.R | 52 +++++++++++++++ .../dataset/tracts-2010TIGER-marital-status.csv | 3 + .../construct-tracts-2010TIGER-median-age.R | 42 ++++++++++++ .../dataset/tracts-2010TIGER-median-age.csv | 3 + ...struct-create-tracts-2010TIGER-race-ethnicity.R | 63 +++++++++++++++++ .../dataset/tracts-2010TIGER-race-ethnicity.csv | 3 + 19 files changed, 488 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 dcarea_functions.R create mode 100644 tracts/1980/tabular/race-ethnicity/construction/create-tracts-1980TIGER-race-ethnicity.R create mode 100644 tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv create mode 100644 tracts/2010/tabular/age-race-sex/construction/tracts-2010TIGER-age-race-sex.R create mode 100644 tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv create mode 100644 tracts/2010/tabular/children-present/construction/construct-tracts-2010TIGER-children-pres.R create mode 100644 tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv create mode 100644 tracts/2010/tabular/educ-attainment/construction/construct-tracts-2010TIGER-educ-attainment.R create mode 100644 tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv create mode 100644 tracts/2010/tabular/foreign-born/construction/construct-tracts-2010TIGER-foreign-born.R create mode 100644 tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv create mode 100644 tracts/2010/tabular/marital-status/construction/construct-tracts-2010TIGER-marital-status.R create mode 100644 tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv create mode 100644 tracts/2010/tabular/median-age/construction/construct-tracts-2010TIGER-median-age.R create mode 100644 tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv create mode 100644 tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-2010TIGER-race-ethnicity.R create mode 100644 tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d1db429 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +*.csv filter=lfs diff=lfs merge=lfs -text +*.shp filter=lfs diff=lfs merge=lfs +*.zip filter=lfs diff=lfs merge=lfs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ff17798 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*.shp +*.zip + +.Rhistory + +.venv diff --git a/dcarea_functions.R b/dcarea_functions.R new file mode 100644 index 0000000..4679a5c --- /dev/null +++ b/dcarea_functions.R @@ -0,0 +1,34 @@ +## Functions to create data for DC Area +## (DC & surrounding jurisdictions, including independent cities) + +select.dcarea <- function(dta) { +## Arguments: `dta`: dataset from from which to select observations in +## the DC Area +## Returns: dataset containing only observations from the DC area + + ## Identify counties to keep in DC Area dataset using FIPS county codes + counties <- c( + '110001' # D.C. + , '240031' # Montgomery County + , '240033' # Prince George's County + , '510013' # Arlington County + , '510059' # Fairfax County + , '510510' # Alexandria city + , '510600' # Fairfax city + , '510610' # Falls Church city + ) + + ## Select counties using regular expression based on FIPS codes above + ## and common `GISJOIN` variable + re <- paste0('^G', counties, collapse = '|') + dcarea <- dta[grep(re, dta$GISJOIN, perl=TRUE),] + + ## Replace factor variables to contain only levels in the DC area + if('COUNTY' %in% names(dcarea)) { + dcarea$COUNTY <- factor(dcarea$COUNTY) + } + if('STATE' %in% names(dcarea)) { + dcarea$STATE <- factor(dcarea$STATE) + } + return(dcarea) +} diff --git a/tracts/1980/tabular/race-ethnicity/construction/create-tracts-1980TIGER-race-ethnicity.R b/tracts/1980/tabular/race-ethnicity/construction/create-tracts-1980TIGER-race-ethnicity.R new file mode 100644 index 0000000..12b37ce --- /dev/null +++ b/tracts/1980/tabular/race-ethnicity/construction/create-tracts-1980TIGER-race-ethnicity.R @@ -0,0 +1,24 @@ +## File creates file containing race & ethnicity variables at the +## tract level for DC Area from 1980 Census STF-1 data + +## Set up environment +library(tidyverse) + +## Identify race variable names +races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two') +praces <- paste0('p', races[-1]) +racevars <- c(races, praces) + +## Identify variables to keep that are not race variables +nonracevars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +load('../US/tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.Rdata') + +## Select DC-area tracts +racedta <- select.dcarea(trt80_re) + +## Write file containing wide dataset to CSV format +write.csv(racedta, 'tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv') diff --git a/tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv b/tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv new file mode 100644 index 0000000..d99b57d --- /dev/null +++ b/tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2340869befcfca95f88bc712af8d77bde94b542b4a816afcb6b047c2a394500 +size 242504 diff --git a/tracts/2010/tabular/age-race-sex/construction/tracts-2010TIGER-age-race-sex.R b/tracts/2010/tabular/age-race-sex/construction/tracts-2010TIGER-age-race-sex.R new file mode 100644 index 0000000..c72793f --- /dev/null +++ b/tracts/2010/tabular/age-race-sex/construction/tracts-2010TIGER-age-race-sex.R @@ -0,0 +1,78 @@ +## Creates single file containing median variables at the +## tract level for DC Area from the 2010 Census and 2010-2014 through +## 2013-2017 American Community Surveys for tracts in the DC Area + +## Set up environment +library(tidyverse) + +## Identify variables to keep representing age-race-sex variables +### Abbreviations for racial groups +races <- c( + 'blk' ## Black alone + , 'ami' ## American Indian or Native Alaskan alone + , 'asi' ## Asian alone + , 'pac' ## Native Hawaiian or Other Pacific Islander alone + , 'oth' ## Other race alone + , 'two' ## Two or more races + , 'hsp' ## Hispanic or Latino (of any race) + , 'nhw' ## Non-Hispanic White +) + +### Age group categories +agegrps <- c( + '0004' + , '0509' + , '1014' + , '1517' + , '1819' + , '2024' + , '2529' + , '3034' + , '3544' + , '4554' + , '5564' + , '6574' + , '7584' + , '85up' +) + +### Sex categories +sexes <- c('m', 'f') + +arsvars <- as.vector(sapply(races, paste0, sapply(sexes, paste0, agegrps))) + + +## Identify variables to keep that are not race variables +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## age-race-sex variables +select.arsvars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, arsvars)]) +} + +## Load datasets containing median age variables from 2010 TIGER/Line files +load('../US/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.Rdata') + +## Create list of data frames for each year and restrict to created +## age-race-sex variables +dtas <- list(trt10_ars, trt14_ars, trt15_ars, trt16_ars, trt17_ars) +arsdtas <- lapply(dtas, select.arsvars) + +## Create single wide data frame with year appended to variable name +suffixes <- c('10', 14:17) +namelist <- lapply(suffixes, function(x) c(geovars, paste0(arsvars, x))) +arsdta <- mapply(setNames, arsdtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +arsdta$STATE <- arsdta$STATE.x +arsdta$COUNTY <- arsdta$COUNTY.x +arsdta <- arsdta[, -(grep('^STATE.+|^COUNTY.+', names(arsdta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(arsdta, 'tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv') + diff --git a/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv b/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv new file mode 100644 index 0000000..1820397 --- /dev/null +++ b/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7f49097783c8fa850d4641c66733b8d93a0084ae9841dbc70a5a261d434783 +size 2653606 diff --git a/tracts/2010/tabular/children-present/construction/construct-tracts-2010TIGER-children-pres.R b/tracts/2010/tabular/children-present/construction/construct-tracts-2010TIGER-children-pres.R new file mode 100644 index 0000000..087d1e0 --- /dev/null +++ b/tracts/2010/tabular/children-present/construction/construct-tracts-2010TIGER-children-pres.R @@ -0,0 +1,50 @@ +## Creates single file containing household presence of children variables +## for tracts based on 2010 Census and the 2010-2014 through 2013-2017 +## American Community Surveys for tracts in the DC Area +## +## tothh = Total households +## chpr = Households with children present +## ncpr = Households with no children present + +## Set up environment +library(tidyverse) + +## Identify variable abbreviations +presence <- c('tothh', 'chpr', 'ncpr') +ppresence <- paste0('p',presence[-1]) +presvars <- c(presence, ppresence) + +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## foreign-born variables +select.presvars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, presvars)]) +} + +## Load datasets containing children present variables from files based on +## 2010 tracts +load('../US/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.Rdata') + +## Create list of data frames for each year and restrict to created +## children present variables +dtas <- list(trt10c_cp, trt14_cp, trt15_cp, trt16_cp, trt17_cp) +presdtas <- lapply(dtas, select.presvars) + +## Create single wide data frame with year appended to variable name +namelist <- lapply(c('10c', 14:17), + function(x) c(geovars, paste0(presvars, x))) +presdta <- mapply(setNames, presdtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +presdta$STATE <- presdta$STATE.x +presdta$COUNTY <- presdta$COUNTY.x + +presdta <- presdta[, -(grep('^STATE.+|^COUNTY.+', names(presdta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(presdta, 'tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv') diff --git a/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv b/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv new file mode 100644 index 0000000..bb3bda0 --- /dev/null +++ b/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5369891731e77a322d97fcd392d3ac5e6f0be82dd4555db122dff5906f5f96b9 +size 292289 diff --git a/tracts/2010/tabular/educ-attainment/construction/construct-tracts-2010TIGER-educ-attainment.R b/tracts/2010/tabular/educ-attainment/construction/construct-tracts-2010TIGER-educ-attainment.R new file mode 100644 index 0000000..70464aa --- /dev/null +++ b/tracts/2010/tabular/educ-attainment/construction/construct-tracts-2010TIGER-educ-attainment.R @@ -0,0 +1,53 @@ +## Creates single file containing educational attainment variables at the +## 2010-2014 through 2013-2017 American Community Surveys for tracts +## in the DC Area + +## tot25o = Total population 25 and older +## lh = Less than high school +## hs = High school or GED +## sc = Some college +## aa = Associate's degree +## ba = Bachelor's degree +## gr = Graduate degree + +## Set up environment +library(tidyverse) + +## Identify variable abbreviations +attain <- c('tot25o', 'lh', 'hs', 'sc', 'aa', 'ba', 'gr') +pattain <- paste0('p',attain[-1]) +attainvars <- c(attain, pattain) + +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## educational attainment variables +select.attainvars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, attainvars)]) +} + +## Load datasets containing educational attainment variables from files based on +## 2010 tracts +load('../US/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.Rdata') + +## Create list of data frames for each year and restrict to created +## educational attainmnet variables +dtas <- list(trt14_ed, trt15_ed, trt16_ed, trt17_ed) +attaindtas <- lapply(dtas, select.attainvars) + +## Create single wide data frame with year appended to variable name +namelist <- lapply(14:17, + function(x) c(geovars, paste0(attainvars, x))) +attaindta <- mapply(setNames, attaindtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +attaindta$STATE <- attaindta$STATE.x +attaindta$COUNTY <- attaindta$COUNTY.x +attaindta <- attaindta[, -(grep('^STATE.+|^COUNTY.+', names(attaindta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(attaindta, 'tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv') diff --git a/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv b/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv new file mode 100644 index 0000000..163dd3c --- /dev/null +++ b/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5991aa2ae6d67e1fea62bf966570a24ffe96a9ea869ca91753fa522f88ee0639 +size 591056 diff --git a/tracts/2010/tabular/foreign-born/construction/construct-tracts-2010TIGER-foreign-born.R b/tracts/2010/tabular/foreign-born/construction/construct-tracts-2010TIGER-foreign-born.R new file mode 100644 index 0000000..f43bf2b --- /dev/null +++ b/tracts/2010/tabular/foreign-born/construction/construct-tracts-2010TIGER-foreign-born.R @@ -0,0 +1,59 @@ +## Creates single file containing foreign-born variables at the +## 2010-2014 through 2013-2017 American Community Surveys for tracts +## in the DC Area + +## fbpop = Foreign-born population +## eur = European foreign-born +## asi = Asian foreign-born +## afr = African foreign-born +## oce = Oceania foreign-born +## lat = Latin American foreign-born +## Note `p` variables represent *proportion of foreign-born residents* +## +## Note: Remainder of foreign born are from North America, i.e.: +## fbpop - sum(eur + asi + afr + oce + lat) = N. American foreign-born + +## Set up environment +library(tidyverse) + +## Identify variable abbreviations +regions <- c('fbpop', 'eur','asi','afr','oce','lat') +pregions <- paste0('p',regions[-1]) +regionvars <- c(regions, pregions) + +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## foreign-born variables +select.regionvars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, regionvars)]) +} + +## Load datasets containing foreign-born variables from files based on +## 2010 tracts +load('../US/tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.Rdata') + +## Create list of data frames for each year and restrict to created +## foreign-born variables +dtas <- list(trt14_fb, trt15_fb, trt16_fb, trt17_fb) +regiondtas <- lapply(dtas, select.regionvars) + +## Create single wide data frame with year appended to variable name +namelist <- lapply(14:17, + function(x) c(geovars, paste0(regionvars, x))) +regiondta <- mapply(setNames, regiondtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +regiondta$STATE <- regiondta$STATE.x +regiondta$COUNTY <- regiondta$COUNTY.x +regiondta <- regiondta[, -(grep('^STATE.+|^COUNTY.+', names(regiondta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(regiondta, 'tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv') + + + diff --git a/tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv b/tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv new file mode 100644 index 0000000..914aed6 --- /dev/null +++ b/tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abebeca99f1efb8cca12dec8145cd6a80fc7fbb3ca1068748258822ed5399802 +size 418196 diff --git a/tracts/2010/tabular/marital-status/construction/construct-tracts-2010TIGER-marital-status.R b/tracts/2010/tabular/marital-status/construction/construct-tracts-2010TIGER-marital-status.R new file mode 100644 index 0000000..ad59c46 --- /dev/null +++ b/tracts/2010/tabular/marital-status/construction/construct-tracts-2010TIGER-marital-status.R @@ -0,0 +1,52 @@ +## Creates single file containing median variables at the +## 2010-2014 through 2013-2017 American Community Surveys for tracts +## in the DC Area + +## Set up environment +library(tidyverse) + +## Identify marital status variable names +statuses <- c('mar', 'nvm', 'wid', 'div') +pstatuses <- paste0('p', statuses) +statusvars <- c(statuses, pstatuses) + +## Identify geographic identifiers to keep +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Define function to create dataset that contains only DC-area tracts and +## constructed marital status variables +select.statusvars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, statusvars)]) +} + +## Load datasets containing marital status variables using data from +## files based on 2010 tracts +load('../US/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.Rdata') + +## Create list of data frames for each year and restrict to created +## marital status variables +dtas <- list(trt14_ms, trt15_ms, trt16_ms, trt17_ms) +statusdtas <- lapply(dtas, select.statusvars) + +## Create single wide data frame with year appended to variable name +namelist <- lapply(14:17, function(x) c(geovars, paste0(statusvars, x))) +statusdta <- mapply(setNames, statusdtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +statusdta$STATE <- statusdta$STATE.x +statusdta$COUNTY <- statusdta$COUNTY.x +statusdta <- statusdta[, -(grep('^STATE.+|^COUNTY.+', names(statusdta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(statusdta, + 'tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv') + + + + + + diff --git a/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv b/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv new file mode 100644 index 0000000..45ed91c --- /dev/null +++ b/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cfc588c2f40f8c3f231f7cd7825a78d3c7371804e1d3b4e474b5535d91f27d +size 403477 diff --git a/tracts/2010/tabular/median-age/construction/construct-tracts-2010TIGER-median-age.R b/tracts/2010/tabular/median-age/construction/construct-tracts-2010TIGER-median-age.R new file mode 100644 index 0000000..2f11ad8 --- /dev/null +++ b/tracts/2010/tabular/median-age/construction/construct-tracts-2010TIGER-median-age.R @@ -0,0 +1,42 @@ +## Creates single file containing median variables at the +## tract level for DC Area from the 2010 Census and 2010-2014 through +## 2013-2017 American Community Surveys for tracts in the DC Area + +## Set up environment +library(tidyverse) + +agevar <- 'mdage' + +## Identify variables to keep that are not race variables +geovars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## race variables +select.agevars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(geovars, agevar)]) +} + +## Load datasets containing median age variables from 2010 TIGER/Line files +load('../US/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.Rdata') + +## Create list of data frames for each year and restrict to created race +## variables +dtas <- list(trt10c_ag, trt14_ag, trt15_ag, trt16_ag, trt17_ag) +agedtas <- lapply(dtas, select.agevars) + +## Create single wide data frame with year appended to variable name +suffixes <- c('10c', 14:17) +namelist <- lapply(suffixes, function(x) c(geovars, paste0(agevar, x))) +agedta <- mapply(setNames, agedtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +agedta$STATE <- agedta$STATE.x +agedta$COUNTY <- agedta$COUNTY.x +agedta <- agedta[, -(grep('^STATE.+|^COUNTY.+', names(agedta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(agedta, 'tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv') diff --git a/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv b/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv new file mode 100644 index 0000000..7322aa4 --- /dev/null +++ b/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df460885dea456803f8c0348f45cf8da65037f45b9769682e4784a626f4ad36c +size 78786 diff --git a/tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-2010TIGER-race-ethnicity.R b/tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-2010TIGER-race-ethnicity.R new file mode 100644 index 0000000..a37a8d2 --- /dev/null +++ b/tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-2010TIGER-race-ethnicity.R @@ -0,0 +1,63 @@ +## File creates single file containing race & ethnicity variables at the +## tract level for DC Area from the 2010 Census and 2007-2011 through +## 2011-2015 American Community Surveys. +## +## Also includes variables indicating whether a neighborhood qualifies as a +## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make +## up at least 10% of the tract and no group is a majority) in each year. + + +## Set up environment +library(tidyverse) + +## Identify race variable names +races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two') +praces <- paste0('p', races[-1]) +racevars <- c(races, praces) + +## Identify variables to keep that are not race variables +nonracevars <- c('GISJOIN', 'STATE', 'COUNTY') + +## Set Functions +source('dcarea_functions.R') + +## Create dataset that contains only DC-area tracts and constructed +## race variables +select.racevars <- function(dta) { + dta <- select.dcarea(dta) + dta$GISJOIN <- as.character(dta$GISJOIN) + return(dta[, c(nonracevars, racevars)]) +} + +## Identify quadrivial neighborhoods +id.quads <- function(dta) { + eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5) + dta['quad'] <- apply(eligible, 1, all) + return(dta) +} + +# ## Sets variable names for a specific dataset in the `racedtas` list +# set.varnames <- function(x,y) setNames(racedtas[[x]], y) + +## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files +## for US tracts 2010 +load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata') + +## Create list of data frames for each year and restrict to created race +## variables +dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re, + trt16_re, trt17_re) +racedtas <- lapply(dtas, select.racevars) %>% + lapply(id.quads) + +## Create single wide data frame with year appended to variable name +namelist <- lapply(10:17, + function(x) c(nonracevars, paste0(c(racevars, 'quad'), x))) +racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>% + reduce(left_join, by='GISJOIN') +racedta$STATE <- racedta$STATE.x +racedta$COUNTY <- racedta$COUNTY.x +racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))] + +## Write file containing wide dataset to CSV format +write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv') diff --git a/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv b/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv new file mode 100644 index 0000000..2581045 --- /dev/null +++ b/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc87ae131ea3272637ad5f15e577fa7684d986ec36b76656ef402258c1a05e7 +size 1114494