# Bound et al. (1995) Table 1 – wage equations, men born 1930–1939. library(tidyverse) library(ivreg) library(here) library(gt) source(here("04-topics/rep-bound1995/Rcode/bound1995-data-prep.R")) source(here("04-topics/rep-bound1995/Rcode/bound1995-iv-diagnostics.R")) source(here("04-topics/rep-bound1995/Rcode/bound1995-gt-quarto.R")) data <- load_bound95_data() demo <- bound95_demo_controls yob <- bound95_yob_controls qtr <- bound95_qtr_iv qtr_yr <- bound95_qtr_yr_iv specs <- list( list(id = 1L, type = "OLS", controls = c("AGE", "AGESQ", demo)), list( id = 2L, type = "IV", controls = c("AGE", "AGESQ", demo), excluded = qtr, included = c("AGE", "AGESQ", demo) ), list(id = 3L, type = "OLS", controls = c(yob, demo)), list( id = 4L, type = "IV", controls = c(yob, demo), excluded = qtr_yr, included = c(yob, demo) ), list(id = 5L, type = "OLS", controls = c(yob, "AGEQ", "AGEQSQ", demo)), list( id = 6L, type = "IV", controls = c(yob, "AGEQ", "AGEQSQ", demo), excluded = qtr_yr, included = c(yob, "AGEQ", "AGEQSQ", demo) ) ) run_spec <- function(spec) { if (spec$type == "OLS") { m <- bound95_run_ols(data, spec$controls) out <- list( coef = NA_real_, se = NA_real_, f_excluded = NA_real_, partial_r2 = NA_real_, overid_f = NA_real_ ) } else { m <- bound95_run_iv(data, spec$controls, spec$excluded, spec$included) fs <- bound95_first_stage_stats( data, excluded = spec$excluded, included = spec$included ) out <- c( fs, list( overid_f = bound95_basmann_f( m, fs$excluded, spec$included, data ) ) ) } educ <- bound95_educ_coef(m) out$coef <- educ$estimate out$se <- educ$std.error out$n_excluded <- if (spec$type == "IV") length(spec$excluded) else NA_integer_ out } results <- purrr::map(specs, run_spec) paper_targets <- tibble::tribble( ~col, ~coef, ~se, ~f_fs, ~pr2, ~overid, 1L, 0.063, 0.000, NA, NA, NA, 2L, 0.142, 0.033, 13.486, 0.012, 0.932, 3L, 0.063, 0.000, NA, NA, NA, 4L, 0.081, 0.016, 4.747, 0.043, 0.775, 5L, 0.063, 0.000, NA, NA, NA, 6L, 0.060, 0.029, 1.613, 0.014, 0.725 ) replication <- tibble( col = vapply(specs, `[[`, integer(1), "id"), type = vapply(specs, `[[`, character(1), "type"), coef = vapply(results, `[[`, numeric(1), "coef"), se = vapply(results, `[[`, numeric(1), "se"), f_excluded = vapply(results, `[[`, numeric(1), "f_excluded"), partial_r2 = vapply(results, `[[`, numeric(1), "partial_r2"), overid_f = vapply(results, `[[`, numeric(1), "overid_f"), n_excluded = vapply(results, `[[`, numeric(1), "n_excluded") ) message("Table I replication vs Bound (1995):") print(replication) print(paper_targets) fmt_col <- function(vals, fn) { purrr::map_chr(vals, fn) } table_data <- tibble( ` ` = c( "Coefficient", "F (excluded instruments)", "Partial R² (excluded instruments, ×100)", "F (overidentification)", "Age, Age²", "9 Year of birth dummies", "Quarter of birth (excluded IV)", "Quarter of birth × year of birth (excluded IV)", "Number of excluded instruments" ) ) for (i in seq_along(specs)) { r <- results[[i]] s <- specs[[i]] col_lab <- paste0("(", s$id, ")") is_iv <- s$type == "IV" table_data[[col_lab]] <- c( bound95_coef_se(r$coef, r$se), if (is_iv) bound95_stat_cell(r$f_excluded) else " ", if (is_iv) bound95_stat_cell(r$partial_r2 * 100) else " ", if (is_iv) bound95_stat_cell(r$overid_f) else " ", if (s$id %in% c(1L, 2L, 5L, 6L)) "×" else " ", if (s$id %in% c(3L, 4L, 5L, 6L)) "×" else " ", if (is_iv && identical(s$excluded, qtr)) "×" else " ", if (is_iv && identical(s$excluded, qtr_yr)) "×" else " ", if (is_iv) as.character(r$n_excluded) else " " ) } table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data |> gt() |> tab_header( title = md("Table 1: Estimated Effect of Completed Years of Education on Men's Log Weekly Earnings") ) |> tab_spanner(label = "OLS", columns = `(1)`, id = "t1_ols1") |> tab_spanner(label = "IV", columns = `(2)`, id = "t1_iv2") |> tab_spanner(label = "OLS", columns = `(3)`, id = "t1_ols3") |> tab_spanner(label = "IV", columns = `(4)`, id = "t1_iv4") |> tab_spanner(label = "OLS", columns = `(5)`, id = "t1_ols5") |> tab_spanner(label = "IV", columns = `(6)`, id = "t1_iv6") |> cols_label(` ` = gt::md(" ")) |> cols_align(align = "left", columns = 1) |> cols_align(align = "center", columns = 2:7) |> tab_footnote( footnote = md("Standard errors in parentheses. 5% Public-Use Sample, 1980 Census; men born 1930–1939 (N = 329,509). All specifications include race, SMSA, marital status, and eight regional dummies."), locations = cells_title(groups = "title") ) |> fmt_markdown(columns = everything()) |> opt_row_striping() |> tab_options( table.font.size = px(13), heading.background.color = "#e8e8e8", table.width = pct(100) ) save(table_data, gt_tbl, replication, paper_targets, file = here("04-topics/rep-bound1995/Rcode/Table_I.RData"))