commit
c22b6c0c95
19 changed files with 488 additions and 0 deletions
-
3.gitattributes
-
6.gitignore
-
34dcarea_functions.R
-
24tracts/1980/tabular/race-ethnicity/construction/create-tracts-1980TIGER-race-ethnicity.R
-
BINtracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv
-
78tracts/2010/tabular/age-race-sex/construction/tracts-2010TIGER-age-race-sex.R
-
BINtracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv
-
50tracts/2010/tabular/children-present/construction/construct-tracts-2010TIGER-children-pres.R
-
BINtracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv
-
53tracts/2010/tabular/educ-attainment/construction/construct-tracts-2010TIGER-educ-attainment.R
-
BINtracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv
-
59tracts/2010/tabular/foreign-born/construction/construct-tracts-2010TIGER-foreign-born.R
-
BINtracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv
-
52tracts/2010/tabular/marital-status/construction/construct-tracts-2010TIGER-marital-status.R
-
BINtracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv
-
42tracts/2010/tabular/median-age/construction/construct-tracts-2010TIGER-median-age.R
-
BINtracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv
-
63tracts/2010/tabular/race-ethnicity/construction/construct-create-tracts-2010TIGER-race-ethnicity.R
-
BINtracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv
@ -0,0 +1,3 @@ |
|||||
|
*.csv filter=lfs diff=lfs merge=lfs -text |
||||
|
*.shp filter=lfs diff=lfs merge=lfs |
||||
|
*.zip filter=lfs diff=lfs merge=lfs |
||||
@ -0,0 +1,6 @@ |
|||||
|
*.shp |
||||
|
*.zip |
||||
|
|
||||
|
.Rhistory |
||||
|
|
||||
|
.venv |
||||
@ -0,0 +1,34 @@ |
|||||
|
## Functions to create data for DC Area |
||||
|
## (DC & surrounding jurisdictions, including independent cities) |
||||
|
|
||||
|
select.dcarea <- function(dta) { |
||||
|
## Arguments: `dta`: dataset from from which to select observations in |
||||
|
## the DC Area |
||||
|
## Returns: dataset containing only observations from the DC area |
||||
|
|
||||
|
## Identify counties to keep in DC Area dataset using FIPS county codes |
||||
|
counties <- c( |
||||
|
'110001' # D.C. |
||||
|
, '240031' # Montgomery County |
||||
|
, '240033' # Prince George's County |
||||
|
, '510013' # Arlington County |
||||
|
, '510059' # Fairfax County |
||||
|
, '510510' # Alexandria city |
||||
|
, '510600' # Fairfax city |
||||
|
, '510610' # Falls Church city |
||||
|
) |
||||
|
|
||||
|
## Select counties using regular expression based on FIPS codes above |
||||
|
## and common `GISJOIN` variable |
||||
|
re <- paste0('^G', counties, collapse = '|') |
||||
|
dcarea <- dta[grep(re, dta$GISJOIN, perl=TRUE),] |
||||
|
|
||||
|
## Replace factor variables to contain only levels in the DC area |
||||
|
if('COUNTY' %in% names(dcarea)) { |
||||
|
dcarea$COUNTY <- factor(dcarea$COUNTY) |
||||
|
} |
||||
|
if('STATE' %in% names(dcarea)) { |
||||
|
dcarea$STATE <- factor(dcarea$STATE) |
||||
|
} |
||||
|
return(dcarea) |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
## File creates file containing race & ethnicity variables at the |
||||
|
## tract level for DC Area from 1980 Census STF-1 data |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify race variable names |
||||
|
races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two') |
||||
|
praces <- paste0('p', races[-1]) |
||||
|
racevars <- c(races, praces) |
||||
|
|
||||
|
## Identify variables to keep that are not race variables |
||||
|
nonracevars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
load('../US/tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.Rdata') |
||||
|
|
||||
|
## Select DC-area tracts |
||||
|
racedta <- select.dcarea(trt80_re) |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(racedta, 'tracts/1980/tabular/race-ethnicity/dataset/tracts-1980TIGER-race-ethnicity.csv') |
||||
|
size 242504 |
@ -0,0 +1,78 @@ |
|||||
|
## Creates single file containing median variables at the |
||||
|
## tract level for DC Area from the 2010 Census and 2010-2014 through |
||||
|
## 2013-2017 American Community Surveys for tracts in the DC Area |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify variables to keep representing age-race-sex variables |
||||
|
### Abbreviations for racial groups |
||||
|
races <- c( |
||||
|
'blk' ## Black alone |
||||
|
, 'ami' ## American Indian or Native Alaskan alone |
||||
|
, 'asi' ## Asian alone |
||||
|
, 'pac' ## Native Hawaiian or Other Pacific Islander alone |
||||
|
, 'oth' ## Other race alone |
||||
|
, 'two' ## Two or more races |
||||
|
, 'hsp' ## Hispanic or Latino (of any race) |
||||
|
, 'nhw' ## Non-Hispanic White |
||||
|
) |
||||
|
|
||||
|
### Age group categories |
||||
|
agegrps <- c( |
||||
|
'0004' |
||||
|
, '0509' |
||||
|
, '1014' |
||||
|
, '1517' |
||||
|
, '1819' |
||||
|
, '2024' |
||||
|
, '2529' |
||||
|
, '3034' |
||||
|
, '3544' |
||||
|
, '4554' |
||||
|
, '5564' |
||||
|
, '6574' |
||||
|
, '7584' |
||||
|
, '85up' |
||||
|
) |
||||
|
|
||||
|
### Sex categories |
||||
|
sexes <- c('m', 'f') |
||||
|
|
||||
|
arsvars <- as.vector(sapply(races, paste0, sapply(sexes, paste0, agegrps))) |
||||
|
|
||||
|
|
||||
|
## Identify variables to keep that are not race variables |
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## age-race-sex variables |
||||
|
select.arsvars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, arsvars)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing median age variables from 2010 TIGER/Line files |
||||
|
load('../US/tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created |
||||
|
## age-race-sex variables |
||||
|
dtas <- list(trt10_ars, trt14_ars, trt15_ars, trt16_ars, trt17_ars) |
||||
|
arsdtas <- lapply(dtas, select.arsvars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
suffixes <- c('10', 14:17) |
||||
|
namelist <- lapply(suffixes, function(x) c(geovars, paste0(arsvars, x))) |
||||
|
arsdta <- mapply(setNames, arsdtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
arsdta$STATE <- arsdta$STATE.x |
||||
|
arsdta$COUNTY <- arsdta$COUNTY.x |
||||
|
arsdta <- arsdta[, -(grep('^STATE.+|^COUNTY.+', names(arsdta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(arsdta, 'tracts/2010/tabular/age-race-sex/dataset/tracts-2010TIGER-age-race-sex.csv') |
||||
|
|
||||
|
size 2653606 |
@ -0,0 +1,50 @@ |
|||||
|
## Creates single file containing household presence of children variables |
||||
|
## for tracts based on 2010 Census and the 2010-2014 through 2013-2017 |
||||
|
## American Community Surveys for tracts in the DC Area |
||||
|
## |
||||
|
## tothh = Total households |
||||
|
## chpr = Households with children present |
||||
|
## ncpr = Households with no children present |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify variable abbreviations |
||||
|
presence <- c('tothh', 'chpr', 'ncpr') |
||||
|
ppresence <- paste0('p',presence[-1]) |
||||
|
presvars <- c(presence, ppresence) |
||||
|
|
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## foreign-born variables |
||||
|
select.presvars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, presvars)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing children present variables from files based on |
||||
|
## 2010 tracts |
||||
|
load('../US/tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created |
||||
|
## children present variables |
||||
|
dtas <- list(trt10c_cp, trt14_cp, trt15_cp, trt16_cp, trt17_cp) |
||||
|
presdtas <- lapply(dtas, select.presvars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
namelist <- lapply(c('10c', 14:17), |
||||
|
function(x) c(geovars, paste0(presvars, x))) |
||||
|
presdta <- mapply(setNames, presdtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
presdta$STATE <- presdta$STATE.x |
||||
|
presdta$COUNTY <- presdta$COUNTY.x |
||||
|
|
||||
|
presdta <- presdta[, -(grep('^STATE.+|^COUNTY.+', names(presdta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(presdta, 'tracts/2010/tabular/children-present/dataset/tracts-2010TIGER-children-present.csv') |
||||
|
size 292289 |
@ -0,0 +1,53 @@ |
|||||
|
## Creates single file containing educational attainment variables at the |
||||
|
## 2010-2014 through 2013-2017 American Community Surveys for tracts |
||||
|
## in the DC Area |
||||
|
|
||||
|
## tot25o = Total population 25 and older |
||||
|
## lh = Less than high school |
||||
|
## hs = High school or GED |
||||
|
## sc = Some college |
||||
|
## aa = Associate's degree |
||||
|
## ba = Bachelor's degree |
||||
|
## gr = Graduate degree |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify variable abbreviations |
||||
|
attain <- c('tot25o', 'lh', 'hs', 'sc', 'aa', 'ba', 'gr') |
||||
|
pattain <- paste0('p',attain[-1]) |
||||
|
attainvars <- c(attain, pattain) |
||||
|
|
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## educational attainment variables |
||||
|
select.attainvars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, attainvars)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing educational attainment variables from files based on |
||||
|
## 2010 tracts |
||||
|
load('../US/tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created |
||||
|
## educational attainmnet variables |
||||
|
dtas <- list(trt14_ed, trt15_ed, trt16_ed, trt17_ed) |
||||
|
attaindtas <- lapply(dtas, select.attainvars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
namelist <- lapply(14:17, |
||||
|
function(x) c(geovars, paste0(attainvars, x))) |
||||
|
attaindta <- mapply(setNames, attaindtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
attaindta$STATE <- attaindta$STATE.x |
||||
|
attaindta$COUNTY <- attaindta$COUNTY.x |
||||
|
attaindta <- attaindta[, -(grep('^STATE.+|^COUNTY.+', names(attaindta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(attaindta, 'tracts/2010/tabular/educ-attainment/dataset/tracts-2010TIGER-educ-attainment.csv') |
||||
|
size 591056 |
@ -0,0 +1,59 @@ |
|||||
|
## Creates single file containing foreign-born variables at the |
||||
|
## 2010-2014 through 2013-2017 American Community Surveys for tracts |
||||
|
## in the DC Area |
||||
|
|
||||
|
## fbpop = Foreign-born population |
||||
|
## eur = European foreign-born |
||||
|
## asi = Asian foreign-born |
||||
|
## afr = African foreign-born |
||||
|
## oce = Oceania foreign-born |
||||
|
## lat = Latin American foreign-born |
||||
|
## Note `p` variables represent *proportion of foreign-born residents* |
||||
|
## |
||||
|
## Note: Remainder of foreign born are from North America, i.e.: |
||||
|
## fbpop - sum(eur + asi + afr + oce + lat) = N. American foreign-born |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify variable abbreviations |
||||
|
regions <- c('fbpop', 'eur','asi','afr','oce','lat') |
||||
|
pregions <- paste0('p',regions[-1]) |
||||
|
regionvars <- c(regions, pregions) |
||||
|
|
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## foreign-born variables |
||||
|
select.regionvars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, regionvars)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing foreign-born variables from files based on |
||||
|
## 2010 tracts |
||||
|
load('../US/tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created |
||||
|
## foreign-born variables |
||||
|
dtas <- list(trt14_fb, trt15_fb, trt16_fb, trt17_fb) |
||||
|
regiondtas <- lapply(dtas, select.regionvars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
namelist <- lapply(14:17, |
||||
|
function(x) c(geovars, paste0(regionvars, x))) |
||||
|
regiondta <- mapply(setNames, regiondtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
regiondta$STATE <- regiondta$STATE.x |
||||
|
regiondta$COUNTY <- regiondta$COUNTY.x |
||||
|
regiondta <- regiondta[, -(grep('^STATE.+|^COUNTY.+', names(regiondta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(regiondta, 'tracts/2010/tabular/foreign-born/dataset/tracts-2010TIGER-foreign-born.csv') |
||||
|
|
||||
|
|
||||
|
|
||||
|
size 418196 |
@ -0,0 +1,52 @@ |
|||||
|
## Creates single file containing median variables at the |
||||
|
## 2010-2014 through 2013-2017 American Community Surveys for tracts |
||||
|
## in the DC Area |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify marital status variable names |
||||
|
statuses <- c('mar', 'nvm', 'wid', 'div') |
||||
|
pstatuses <- paste0('p', statuses) |
||||
|
statusvars <- c(statuses, pstatuses) |
||||
|
|
||||
|
## Identify geographic identifiers to keep |
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Define function to create dataset that contains only DC-area tracts and |
||||
|
## constructed marital status variables |
||||
|
select.statusvars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, statusvars)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing marital status variables using data from |
||||
|
## files based on 2010 tracts |
||||
|
load('../US/tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created |
||||
|
## marital status variables |
||||
|
dtas <- list(trt14_ms, trt15_ms, trt16_ms, trt17_ms) |
||||
|
statusdtas <- lapply(dtas, select.statusvars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
namelist <- lapply(14:17, function(x) c(geovars, paste0(statusvars, x))) |
||||
|
statusdta <- mapply(setNames, statusdtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
statusdta$STATE <- statusdta$STATE.x |
||||
|
statusdta$COUNTY <- statusdta$COUNTY.x |
||||
|
statusdta <- statusdta[, -(grep('^STATE.+|^COUNTY.+', names(statusdta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(statusdta, |
||||
|
'tracts/2010/tabular/marital-status/dataset/tracts-2010TIGER-marital-status.csv') |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
size 403477 |
@ -0,0 +1,42 @@ |
|||||
|
## Creates single file containing median variables at the |
||||
|
## tract level for DC Area from the 2010 Census and 2010-2014 through |
||||
|
## 2013-2017 American Community Surveys for tracts in the DC Area |
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
agevar <- 'mdage' |
||||
|
|
||||
|
## Identify variables to keep that are not race variables |
||||
|
geovars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## race variables |
||||
|
select.agevars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(geovars, agevar)]) |
||||
|
} |
||||
|
|
||||
|
## Load datasets containing median age variables from 2010 TIGER/Line files |
||||
|
load('../US/tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created race |
||||
|
## variables |
||||
|
dtas <- list(trt10c_ag, trt14_ag, trt15_ag, trt16_ag, trt17_ag) |
||||
|
agedtas <- lapply(dtas, select.agevars) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
suffixes <- c('10c', 14:17) |
||||
|
namelist <- lapply(suffixes, function(x) c(geovars, paste0(agevar, x))) |
||||
|
agedta <- mapply(setNames, agedtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
agedta$STATE <- agedta$STATE.x |
||||
|
agedta$COUNTY <- agedta$COUNTY.x |
||||
|
agedta <- agedta[, -(grep('^STATE.+|^COUNTY.+', names(agedta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(agedta, 'tracts/2010/tabular/median-age/dataset/tracts-2010TIGER-median-age.csv') |
||||
|
size 78786 |
@ -0,0 +1,63 @@ |
|||||
|
## File creates single file containing race & ethnicity variables at the |
||||
|
## tract level for DC Area from the 2010 Census and 2007-2011 through |
||||
|
## 2011-2015 American Community Surveys. |
||||
|
## |
||||
|
## Also includes variables indicating whether a neighborhood qualifies as a |
||||
|
## "quadrivial" neighborhood (Asians, blacks, Latinxs, and whites each make |
||||
|
## up at least 10% of the tract and no group is a majority) in each year. |
||||
|
|
||||
|
|
||||
|
## Set up environment |
||||
|
library(tidyverse) |
||||
|
|
||||
|
## Identify race variable names |
||||
|
races <- c('totpop', 'nhw', 'nhb', 'api', 'hsp', 'oth', 'two') |
||||
|
praces <- paste0('p', races[-1]) |
||||
|
racevars <- c(races, praces) |
||||
|
|
||||
|
## Identify variables to keep that are not race variables |
||||
|
nonracevars <- c('GISJOIN', 'STATE', 'COUNTY') |
||||
|
|
||||
|
## Set Functions |
||||
|
source('dcarea_functions.R') |
||||
|
|
||||
|
## Create dataset that contains only DC-area tracts and constructed |
||||
|
## race variables |
||||
|
select.racevars <- function(dta) { |
||||
|
dta <- select.dcarea(dta) |
||||
|
dta$GISJOIN <- as.character(dta$GISJOIN) |
||||
|
return(dta[, c(nonracevars, racevars)]) |
||||
|
} |
||||
|
|
||||
|
## Identify quadrivial neighborhoods |
||||
|
id.quads <- function(dta) { |
||||
|
eligible <- sapply(dta[, praces[1:4]], function(x) x >= .10 & x < .5) |
||||
|
dta['quad'] <- apply(eligible, 1, all) |
||||
|
return(dta) |
||||
|
} |
||||
|
|
||||
|
# ## Sets variable names for a specific dataset in the `racedtas` list |
||||
|
# set.varnames <- function(x,y) setNames(racedtas[[x]], y) |
||||
|
|
||||
|
## Load datasets containing race-ethnicity variables from 2010 TIGER/Line files |
||||
|
## for US tracts 2010 |
||||
|
load('../US/tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.Rdata') |
||||
|
|
||||
|
## Create list of data frames for each year and restrict to created race |
||||
|
## variables |
||||
|
dtas <- list(trt10_re, trt11_re, trt12_re, trt13_re, trt14_re, trt15_re, |
||||
|
trt16_re, trt17_re) |
||||
|
racedtas <- lapply(dtas, select.racevars) %>% |
||||
|
lapply(id.quads) |
||||
|
|
||||
|
## Create single wide data frame with year appended to variable name |
||||
|
namelist <- lapply(10:17, |
||||
|
function(x) c(nonracevars, paste0(c(racevars, 'quad'), x))) |
||||
|
racedta <- mapply(setNames, racedtas, namelist, SIMPLIFY = FALSE) %>% |
||||
|
reduce(left_join, by='GISJOIN') |
||||
|
racedta$STATE <- racedta$STATE.x |
||||
|
racedta$COUNTY <- racedta$COUNTY.x |
||||
|
racedta <- racedta[, -(grep('^STATE.+|^COUNTY.+', names(racedta), perl=TRUE))] |
||||
|
|
||||
|
## Write file containing wide dataset to CSV format |
||||
|
write.csv(racedta, 'tracts/2010/tabular/race-ethnicity/dataset/tracts-2010TIGER-race-ethnicity.csv') |
||||
|
size 1114494 |
Write
Preview
Loading…
Cancel
Save
Reference in new issue