# Card (1993) Table 4 – alternative OLS / IV specifications. library(tidyverse) library(here) library(gt) library(AER) source(here("04-topics/rep-card1993/Rcode/card1993-data-prep.R")) source(here("04-topics/rep-card1993/Rcode/card1993-gt-quarto.R")) data <- load_card93_data() run_ols <- function(d, y = "lwage76", extra = character()) { x <- c("ed76", card93_exp, card93_x_base, card93_fb_full, extra) card93_coef_se(card93_ols(d, y, x), "ed76") } run_iv <- function( d, y = "lwage76", inst = "nearc4", extra_x = character(), endog_extra = character(), inst_extra = character()) { x <- c(card93_x_base, card93_fb_full, extra_x) endog <- unique(c("ed76", "exp76", "exp2", endog_extra)) instruments <- unique(c(inst, "age76", "age2", inst_extra, x)) fml <- as.formula( paste( y, "~", paste(c(endog, x), collapse = " + "), "|", paste(instruments, collapse = " + ") ) ) card93_coef_se(ivreg(fml, data = d), "ed76") } wage <- card93_wage_sample(data) wage78 <- data |> filter(!is.na(lwage78)) wage_kww <- wage |> filter(!is.na(kww)) wage_iq <- wage |> filter(!is.na(kww), !is.na(iq)) young <- wage |> filter(age66 <= 19) rows <- list( list(id = 1L, label = "1. Basic Specification (N = 3010)", d = wage, ols = function(d) run_ols(d), iv = function(d) run_iv(d)), list(id = 2L, label = "2. Use 1978 Wages and Education (N = 2639 with 1978 data)", d = wage78, ols = function(d) run_ols(d, "lwage78"), iv = function(d) run_iv(d, "lwage78")), list(id = 3L, label = "3. Include KWW Test Score (N = 2963 with valid KWW)", d = wage_kww, ols = function(d) run_ols(d, extra = "kww"), iv = function(d) run_iv(d, extra_x = "kww")), list(id = 4L, label = "4. Include KWW; instrument KWW with IQ (N = 2040 with valid KWW and IQ)", d = wage_iq, ols = function(d) run_ols(d, extra = "kww"), iv = function(d) run_iv(d, extra_x = "kww", endog_extra = "kww", inst_extra = "iq")), list(id = 5L, label = "5. Use Proximity to Public College as instrument for education", d = wage, ols = function(d) run_ols(d), iv = function(d) run_iv(d, inst = "nearc4a")), list(id = 6L, label = "6. Use Proximities to 2-year and 4-year colleges as instruments for education", d = wage, ols = function(d) run_ols(d), iv = function(d) run_iv(d, inst = c("nearc4", "nearc2"))), list(id = 7L, label = "7. Use Subsample Age 14-19 in 1966 (N = 2037)", d = young, ols = function(d) run_ols(d), iv = function(d) run_iv(d)) ) results <- purrr::map(rows, function(r) { list(ols = r$ols(r$d), iv = r$iv(r$d), n = nrow(r$d)) }) paper_targets <- tribble( ~row, ~ols, ~ols_se, ~iv, ~iv_se, 1L, 0.073, 0.006, 0.132, 0.049, 2L, 0.066, 0.006, 0.117, 0.061, 3L, 0.055, 0.004, 0.136, 0.078, 4L, 0.061, 0.005, 0.089, 0.085, 5L, 0.073, 0.006, 0.194, 0.059, 6L, 0.073, 0.006, 0.117, 0.047, 7L, 0.076, 0.006, 0.094, 0.064 ) replication <- tibble( row = vapply(rows, `[[`, integer(1), "id"), n = vapply(results, `[[`, integer(1), "n"), ols = vapply(results, function(r) r$ols$estimate, numeric(1)), ols_se = vapply(results, function(r) r$ols$std.error, numeric(1)), iv = vapply(results, function(r) r$iv$estimate, numeric(1)), iv_se = vapply(results, function(r) r$iv$std.error, numeric(1)) ) message("Table IV replication vs paper:") print(replication) print(paper_targets) table_data <- tibble( stub = vapply(rows, `[[`, character(1), "label"), `OLS Estimate` = purrr::map_chr(results, ~ card93_coef_cell(.$ols$estimate, .$ols$std.error)), `IV Estimate` = purrr::map_chr(results, ~ card93_coef_cell(.$iv$estimate, .$iv$std.error)) ) |> mutate( `OLS Estimate` = if_else(row_number() %in% c(5L, 6L), "as in row 1", `OLS Estimate`) ) table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- card93_gt_standard( table_data |> gt(), stub_col = "stub", data_cols = c("OLS Estimate", "IV Estimate") ) gt_tbl <- gt_tbl |> card93_gt_source_notes( c( paste( "**Notes:** Dependent variable in rows 1 and 3–5: log hourly wages in 1976.", "Row 2: log hourly wages in 1978. Estimates are coefficients on the linear", "education term with black, 1976 South/SMSA, 1966 region/SMSA, experience", "and experience-squared, and fourteen family-background controls unless noted." ), paste( "**Replication:** Row 2 uses all men with valid `lwage78` (N = 2639), not", "restricted to the N = 3010 wage subsample. Row 4 IV standard error is", "imprecise in this replication. Rows 5– OLS match row 1; only IV instruments differ." ) ) ) |> card93_gt_col_footnote( "IV Estimate", paste( "Education and experience are endogenous; instruments are `nearc4` (or alternatives", "noted below), `age76`, and `age2`, plus all exogenous controls. Row 1 matches", "Panel B, column (6) of Table 3." ) ) |> card93_gt_stub_footnote( table_data, "4. Include KWW; instrument KWW with IQ (N = 2040 with valid KWW and IQ)", "KWW enters the wage equation and is instrumented by IQ (`iq`); subsample with non-missing KWW and IQ." ) |> card93_gt_stub_footnote( table_data, "5. Use Proximity to Public College as instrument for education", "Instrument for schooling is proximity to a public 4-year college (`nearc4a`)." ) |> card93_gt_stub_footnote( table_data, "6. Use Proximities to 2-year and 4-year colleges as instruments for education", "Instruments are `nearc4` (4-year) and `nearc2` (2-year college proximity)." ) |> card93_gt_stub_footnote( table_data, "7. Use Subsample Age 14-19 in 1966 (N = 2037)", "Subsample with `age66` $\\leq$ 19 in 1966." ) gt_tbl <- card93_gt_finalize(gt_tbl) save(table_data, gt_tbl, replication, paper_targets, file = here("04-topics/rep-card1993/Rcode/Table_IV.RData"))