# Load and prepare the AK91 / Bound (1995) 1980 Census sample (men born 1930–1939). load_bound95_data <- function( path = here("04-topics/rep-bound1995/Rcode/raw_data.dta"), with_state = FALSE) { data <- haven::read_dta(path) data <- data |> dplyr::rename( AGE = v1, AGEQ = v2, EDUC = v4, ENOCENT = v5, ESOCENT = v6, LWKLYWGE = v9, MARRIED = v10, MIDATL = v11, MT = v12, NEWENG = v13, CENSUS = v16, SOB = v17, QOB = v18, RACE = v19, SMSA = v20, SOATL = v21, WNOCENT = v24, WSOCENT = v25, YOB = v27 ) |> dplyr::mutate( YOB = dplyr::if_else(YOB >= 1900, YOB - 1900, YOB), COHORT = dplyr::case_when( YOB >= 20 & YOB <= 29 ~ 2029L, YOB >= 30 & YOB <= 39 ~ 3039L, YOB >= 40 & YOB <= 49 ~ 4049L, TRUE ~ NA_integer_ ), AGEQ = dplyr::if_else(CENSUS == 80, AGEQ - 1900, AGEQ), AGEQSQ = AGEQ^2, AGESQ = AGE^2 ) |> dplyr::filter(COHORT == 3039L) for (i in 0:9) { data[[paste0("YR", i)]] <- as.integer(data$YOB %in% c(20 + i, 30 + i, 40 + i)) } for (i in 1:3) { data[[paste0("QTR", i)]] <- as.integer(data$QOB == i) } for (j in 1:3) { for (i in 0:9) { data[[paste0("QTR", j, "YR", i)]] <- data[[paste0("QTR", j)]] * data[[paste0("YR", i)]] } } if (with_state) { unique_states <- sort(unique(data$SOB)) n_states <- length(unique_states) st_mat <- stats::model.matrix( stats::as.formula("~ factor(SOB, levels = unique_states) - 1"), data = data ) colnames(st_mat) <- paste0("state", seq_len(ncol(st_mat))) data <- dplyr::bind_cols(data, tibble::as_tibble(st_mat)) for (j in 1:3) { qtr_j <- data[[paste0("QTR", j)]] for (i in seq_len(n_states - 1L)) { data[[paste0("QTR", j, "state", i)]] <- qtr_j * data[[paste0("state", i)]] } } attr(data, "bound95_n_states") <- n_states } data } bound95_demo_controls <- c( "RACE", "MARRIED", "SMSA", "NEWENG", "MIDATL", "ENOCENT", "WNOCENT", "SOATL", "ESOCENT", "WSOCENT", "MT" ) bound95_yob_controls <- paste0("YR", 0:8) bound95_qtr_yr_iv <- as.vector( outer(paste0("QTR", 1:3), paste0("YR", 0:9), paste, sep = "") ) bound95_qtr_iv <- paste0("QTR", 1:3) bound95_state_iv <- function(data) { n_states <- attr(data, "bound95_n_states") as.vector( outer(paste0("QTR", 1:3), paste0("state", seq_len(n_states - 1L)), paste, sep = "") ) } bound95_state_controls <- function(data) { n_states <- attr(data, "bound95_n_states") paste0("state", seq_len(n_states - 1L)) } # Randomly permute quarter of birth (Krueger simulation in Bound Table 3). bound95_resimulate_qob <- function(dat, seed = NULL) { out <- data.frame(dat, check.names = FALSE) n_states <- attr(dat, "bound95_n_states") if (!is.null(n_states)) { attr(out, "bound95_n_states") <- n_states } if (!is.null(seed)) { set.seed(seed) } out[["QOB"]] <- base::sample(as.integer(out[["QOB"]])) qob <- out[["QOB"]] out[["QTR1"]] <- as.integer(qob == 1L) out[["QTR2"]] <- as.integer(qob == 2L) out[["QTR3"]] <- as.integer(qob == 3L) for (j in 1:3) { qj <- out[[paste0("QTR", j)]] for (i in 0:9) { out[[paste0("QTR", j, "YR", i)]] <- qj * out[[paste0("YR", i)]] } } if (!is.null(n_states)) { for (j in 1:3) { qj <- out[[paste0("QTR", j)]] for (i in seq_len(n_states - 1L)) { out[[paste0("QTR", j, "state", i)]] <- qj * out[[paste0("state", i)]] } } } out } # In-place alias for callers that assign the return value back to `dat`. bound95_resimulate_qob_inplace <- function(dat, seed = NULL) { bound95_resimulate_qob(dat, seed = seed) }