# Bound et al. (1995) Table 2 – state-of-birth controls and interactions. library(tidyverse) library(ivreg) library(here) library(gt) source(here("04-topics/rep-bound1995/Rcode/bound1995-data-prep.R")) source(here("04-topics/rep-bound1995/Rcode/bound1995-iv-diagnostics.R")) source(here("04-topics/rep-bound1995/Rcode/bound1995-gt-quarto.R")) data <- load_bound95_data(with_state = TRUE) message("Sample loaded: N = ", nrow(data)) demo <- bound95_demo_controls yob <- bound95_yob_controls state <- bound95_state_controls(data) qtr <- bound95_qtr_iv qtr_yr <- bound95_qtr_yr_iv qtr_state <- bound95_state_iv(data) excluded_full <- c(qtr, qtr_yr, qtr_state) specs <- list( list(id = 1L, type = "OLS", controls = c(yob, demo, state)), list( id = 2L, type = "IV", controls = c(yob, demo, state), excluded = excluded_full, included = c(yob, demo, state) ), list(id = 3L, type = "OLS", controls = c(yob, "AGEQ", "AGEQSQ", demo, state)), list( id = 4L, type = "IV", controls = c(yob, "AGEQ", "AGEQSQ", demo, state), excluded = excluded_full, included = c(yob, "AGEQ", "AGEQSQ", demo, state) ) ) run_spec <- function(spec) { message("Running column (", spec$id, ") ", spec$type, " ...") t0 <- Sys.time() if (spec$type == "OLS") { m <- bound95_run_ols(data, spec$controls) out <- list( f_excluded = NA_real_, partial_r2 = NA_real_, overid_f = NA_real_ ) } else { m <- bound95_run_iv(data, spec$controls, spec$excluded, spec$included) fs <- bound95_first_stage_stats( data, excluded = spec$excluded, included = spec$included ) out <- c( fs, list( overid_f = bound95_basmann_f( m, fs$excluded, spec$included, data ) ) ) } educ <- bound95_educ_coef(m) message(" done in ", round(difftime(Sys.time(), t0, units = "mins"), 1), " min") c(out, list(coef = educ$estimate, se = educ$std.error, n_excluded = length(spec$excluded))) } results <- purrr::map(specs, run_spec) paper_targets <- tibble::tribble( ~col, ~coef, ~se, ~f_fs, ~pr2, ~overid, 1L, 0.063, 0.000, NA, NA, NA, 2L, 0.083, 0.009, 2.428, 0.133, 0.919, 3L, 0.063, 0.000, NA, NA, NA, 4L, 0.081, 0.011, 1.869, 0.101, 0.917 ) replication <- tibble( col = vapply(specs, `[[`, integer(1), "id"), coef = vapply(results, `[[`, numeric(1), "coef"), se = vapply(results, `[[`, numeric(1), "se"), f_excluded = vapply(results, `[[`, numeric(1), "f_excluded"), partial_r2 = vapply(results, `[[`, numeric(1), "partial_r2"), overid_f = vapply(results, `[[`, numeric(1), "overid_f"), n_excluded = vapply(results, `[[`, numeric(1), "n_excluded") ) message("Table II replication vs Bound (1995):") print(replication) table_data <- tibble( ` ` = c( "Coefficient", "F (excluded instruments)", "Partial R² (excluded instruments, ×100)", "F (overidentification)", "Age, Age² (quarter years)", "9 Year of birth dummies", "Quarter of birth / QOB×YOB / QOB×state (excluded IV)", "Number of excluded instruments" ) ) for (i in seq_along(specs)) { r <- results[[i]] s <- specs[[i]] col_lab <- paste0("(", s$id, ")") is_iv <- s$type == "IV" table_data[[col_lab]] <- c( bound95_coef_se(r$coef, r$se), if (is_iv) bound95_stat_cell(r$f_excluded) else " ", if (is_iv) bound95_stat_cell(r$partial_r2 * 100) else " ", if (is_iv) bound95_stat_cell(r$overid_f) else " ", if (s$id %in% c(3L, 4L)) "×" else " ", "×", if (is_iv) "×" else " ", if (is_iv) as.character(r$n_excluded) else " " ) } table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data |> gt() |> tab_header( title = md("Table 2: Estimated Effect of Education on Log Weekly Earnings, Controlling for State of Birth") ) |> tab_spanner(label = "OLS", columns = `(1)`, id = "t2_ols1") |> tab_spanner(label = "IV", columns = `(2)`, id = "t2_iv2") |> tab_spanner(label = "OLS", columns = `(3)`, id = "t2_ols3") |> tab_spanner(label = "IV", columns = `(4)`, id = "t2_iv4") |> cols_label(` ` = gt::md(" ")) |> cols_align(align = "left", columns = 1) |> cols_align(align = "center", columns = 2:5) |> tab_footnote( footnote = md("Standard errors in parentheses. Same sample as Table 1; fifty state-of-birth dummies included as controls."), locations = cells_title(groups = "title") ) |> fmt_markdown(columns = everything()) |> opt_row_striping() save(table_data, gt_tbl, replication, paper_targets, file = here("04-topics/rep-bound1995/Rcode/Table_II.RData")) message("Saved: 04-topics/rep-bound1995/Rcode/Table_II.RData")