# Card (1993) Table 5 – interaction IV (parental education × college proximity). library(tidyverse) library(here) library(gt) library(AER) source(here("04-topics/rep-card1993/Rcode/card1993-data-prep.R")) source(here("04-topics/rep-card1993/Rcode/card1993-gt-quarto.R")) data <- load_card93_data() wage <- card93_wage_sample(data) x <- c(card93_x_base, card93_fb_full) nearc_x_f <- paste0("nearc4_f", 1:8) for (i in 1:8) { wage[[nearc_x_f[i]]] <- wage$nearc4 * wage[[paste0("f", i)]] } rf_rhs <- paste(c("nearc4", "nearc4_low", card93_exp, x), collapse = " + ") rf_ed <- card93_coef_se(lm(as.formula(paste("ed76 ~", rf_rhs)), data = wage), "nearc4") rf_ed_low <- card93_coef_se(lm(as.formula(paste("ed76 ~", rf_rhs)), data = wage), "nearc4_low") rf_w <- card93_coef_se(lm(as.formula(paste("lwage76 ~", rf_rhs)), data = wage), "nearc4") rf_w_low <- card93_coef_se(lm(as.formula(paste("lwage76 ~", rf_rhs)), data = wage), "nearc4_low") # Structural col 3: IV return = Wald ratio RF(nearc4_low); direct nearc4 from 2SLS. wald_iv <- function(num, den) { est <- num$estimate / den$estimate se <- abs(est) * sqrt((num$std.error / num$estimate)^2 + (den$std.error / den$estimate)^2) list(estimate = est, std.error = se) } iv_ed_3 <- wald_iv(rf_w_low, rf_ed_low) iv_3 <- card93_iv_panel_b( wage, "lwage76", "ed76", "nearc4_low", c(x, "nearc4", "nearc4_low") ) nearc_3 <- card93_coef_se(iv_3, "nearc4") # Structural col 4: instruments = nearc4 × f1–f8. inst4 <- c(nearc_x_f, "age76", "age2", x, "nearc4") fml4 <- as.formula( paste( "lwage76 ~ ed76 + exp76 + exp2 + nearc4 +", paste(x, collapse = " + "), "|", paste(inst4, collapse = " + ") ) ) iv_4 <- ivreg(fml4, data = wage) iv_ed_4 <- card93_coef_se(iv_4, "ed76") nearc_4 <- card93_coef_se(iv_4, "nearc4") paper_targets <- tribble( ~col, ~var, ~value, ~se, 1L, "nearc4_ed", 0.154, 0.135, 1L, "nearc4_low_ed", 0.462, 0.186, 2L, "nearc4_w", 0.029, 0.024, 2L, "nearc4_low_w", 0.043, 0.032, 3L, "iv_ed", 0.093, 0.065, 3L, "nearc4", 0.015, 0.029, 4L, "iv_ed", 0.097, 0.048, 4L, "nearc4", 0.013, 0.024 ) replication <- tibble( col = c(1, 1, 2, 2, 3, 3, 4, 4), var = c("nearc4_ed", "nearc4_low_ed", "nearc4_w", "nearc4_low_w", "iv_ed", "nearc4_direct", "iv_ed", "nearc4_direct"), estimate = c( rf_ed$estimate, rf_ed_low$estimate, rf_w$estimate, rf_w_low$estimate, iv_ed_3$estimate, nearc_3$estimate, iv_ed_4$estimate, nearc_4$estimate ), std.error = c( rf_ed$std.error, rf_ed_low$std.error, rf_w$std.error, rf_w_low$std.error, iv_ed_3$std.error, nearc_3$std.error, iv_ed_4$std.error, nearc_4$std.error ) ) message("Table V replication vs paper:") print(replication) table_data <- tibble( stub = c( "Live Near College in 1966", "Live College * Low Parental Education", "Education", "Family Background Variables" ), `(1)` = c( card93_coef_cell(rf_ed$estimate, rf_ed$std.error), card93_coef_cell(rf_ed_low$estimate, rf_ed_low$std.error), "--", "yes" ), `(2)` = c( card93_coef_cell(rf_w$estimate, rf_w$std.error), card93_coef_cell(rf_w_low$estimate, rf_w_low$std.error), "--", "yes" ), `(3)` = c( card93_coef_cell(nearc_3$estimate, nearc_3$std.error), "--", card93_coef_cell(iv_ed_3$estimate, iv_ed_3$std.error), "yes" ), `(4)` = c( card93_coef_cell(nearc_4$estimate, nearc_4$std.error), "--", card93_coef_cell(iv_ed_4$estimate, iv_ed_4$std.error), "yes" ) ) table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data |> gt() |> tab_spanner(label = "Reduced Form Models", columns = c(`(1)`, `(2)`)) |> tab_spanner(label = "Structural Models of Earnings", columns = c(`(3)`, `(4)`)) |> cols_label( `(1)` = html("Education"), `(2)` = html("Earnings"), `(3)` = html(" "), `(4)` = html(" ") ) gt_tbl <- card93_gt_standard( gt_tbl, stub_col = "stub", data_cols = c("(1)", "(2)", "(3)", "(4)"), header_rows = NULL ) gt_tbl <- gt_tbl |> card93_gt_source_notes( c( paste( "**Notes:** Standard errors in parentheses. Sample size is 3010.", "Dependent variable: log hourly wages in 1976 (mean 6.262, SD 0.444).", "All models include black, 1976 South/SMSA, 1966 region/SMSA, experience", "and experience-squared, and full family background. Experience is endogenous", "in structural columns; age and age-squared are instruments." ), paste( "**Replication:** Reduced-form columns (1)–2) use OLS; with endogenous", "experience, Panel B RF via `ivreg` gives slightly different point estimates.", "Direct effect of `nearc4` in columns (3)–4) is small and imprecise." ) ) ) |> card93_gt_stub_footnote( table_data, "Live College * Low Parental Education", paste( "Interaction of an indicator for living near a 4-year college in 1966 with an", "indicator for both parents having less than high-school education (`lowfam = 1`", "if `famed = 9`; `nearc4_low = nearc4` $\\times$ `lowfam`)." ) ) |> card93_gt_stub_footnote( table_data, "Education", paste( "Column (3): instrument for schooling is `nearc4_low`; coefficient is the Wald", "ratio of reduced-form earnings to schooling effects (paper: 0.093). Column (4):", "instruments are `nearc4` $\\times$ `f1`–`nearc4` $\\times$ `f8` (paper IV $\\approx$ 0.097)." ) ) |> card93_gt_stub_footnote( table_data, "Family Background Variables", paste( "Fourteen controls (`card93_fb_full`): parental education (years and missing", "indicators), eight `famed` interaction classes, and two family-structure", "indicators at age 14." ) ) gt_tbl <- card93_gt_finalize(gt_tbl) save(table_data, gt_tbl, replication, paper_targets, file = here("04-topics/rep-card1993/Rcode/Table_V.RData"))