# Card (1993) Table 2 – OLS log hourly wage equations (N = 3010). library(tidyverse) library(here) library(gt) library(car) source(here("04-topics/rep-card1993/Rcode/card1993-data-prep.R")) source(here("04-topics/rep-card1993/Rcode/card1993-gt-quarto.R")) data <- load_card93_data() wage <- card93_wage_sample(data) stopifnot(nrow(wage) == 3010L) specs <- list( list(id = 1L, x = card93_t2_col1), list(id = 2L, x = card93_t2_base), list(id = 3L, x = c(card93_t2_base, card93_fb_ed)), list(id = 4L, x = c(card93_t2_base, card93_fb_ed, card93_fb_famed)), list(id = 5L, x = c(card93_t2_base, card93_fb_full)) ) run_col <- function(xvars) { m <- card93_ols(wage, "lwage76", xvars) ed <- card93_coef_se(m, "ed76") list( ed_coef = ed$estimate, ed_se = ed$std.error, r2 = summary(m)$r.squared, model = m ) } results <- purrr::map(specs, function(s) { out <- run_col(s$x) list( ed_coef = out$ed_coef, ed_se = out$ed_se, r2 = out$r2, model = out$model, spec = s ) }) # Joint F-tests on incremental family-background blocks (rows 3–5). f_test <- function(m_small, m_big) { an <- anova(m_small, m_big) as.numeric(an$`Pr(>F)`[2]) } f_tests <- c( NA_real_, NA_real_, f_test(results[[2]]$model, results[[3]]$model), f_test(results[[3]]$model, results[[4]]$model), f_test(results[[4]]$model, results[[5]]$model) ) paper_targets <- tribble( ~col, ~ed, ~se, ~r2, ~f, 1L, 0.074, 0.004, 0.291, NA_real_, 2L, 0.075, 0.003, 0.300, NA_real_, 3L, 0.073, 0.004, 0.301, 0.235, 4L, 0.074, 0.004, 0.303, 0.462, 5L, 0.073, 0.004, 0.304, 0.165 ) replication <- tibble( col = vapply(specs, `[[`, integer(1), "id"), ed = vapply(results, `[[`, numeric(1), "ed_coef"), se = vapply(results, `[[`, numeric(1), "ed_se"), r2 = vapply(results, `[[`, numeric(1), "r2"), f = f_tests ) message("Table II replication vs paper:") print(replication) print(paper_targets) row_labels <- c( "1. Education", "2. Experience", "3. Experience-squared /100", "4. Black Indicator", "5. Live in South", "6. Live in SMSA", "7. Region in 1966 (8 indicators)", "8. Live in SMSA in 1966", "9. Parental education (years + missing indicators)", "10. Interacted parental education classes", "11. Family structure (2 indicators)", "12. R-squared", "13. F-test on family background variables" ) extract_row <- function(term, fn = card93_coef_cell) { purrr::map_chr(seq_along(specs), function(i) { m <- results[[i]]$model s <- specs[[i]] if (term == "r2") { return(card93_fmt_num(results[[i]]$r2)) } if (term == "f") { return(if (is.na(f_tests[i])) "--" else card93_fmt_num(f_tests[i])) } if (term == "region66") { return(if (2L %in% s$id || s$id >= 2L) "yes" else "no") } if (term == "smsa66") { return(if (s$id >= 2L) "yes" else "no") } if (term == "fb_ed") { return(if (s$id >= 3L) "yes" else "no") } if (term == "fb_famed") { return(if (s$id >= 4L) "yes" else "no") } if (term == "fb_struct") { return(if (s$id >= 5L) "yes" else "no") } cs <- card93_coef_se(m, term) fn(cs$estimate, cs$std.error) }) } table_data <- tibble(stub = row_labels) for (i in seq_along(specs)) { col_lab <- paste0("(", specs[[i]]$id, ")") table_data[[col_lab]] <- c( extract_row("ed76")[i], extract_row("exp76")[i], extract_row("exp2")[i], extract_row("black")[i], extract_row("reg76r")[i], extract_row("smsa76r")[i], extract_row("region66")[i], extract_row("smsa66")[i], extract_row("fb_ed")[i], extract_row("fb_famed")[i], extract_row("fb_struct")[i], extract_row("r2")[i], extract_row("f")[i] ) } table_data <- ak91_quarto_blank_df(table_data) col_names <- paste0("(", 1:5, ")") gt_tbl <- card93_gt_standard( table_data |> gt(), stub_col = "stub", data_cols = col_names ) gt_tbl <- gt_tbl |> card93_gt_source_notes( c( paste( "**Notes:** Standard errors in parentheses. Sample size is 3010.", "Dependent variable: log hourly wages in 1976 (mean 6.262, SD 0.444).", "Experience is `age76 −ed76 −6`; experience-squared enters as", "(experience)$^2$/100 (SAS uses raw squared experience; coefficients differ by 100)." ), paste( "**Replication:** Column (1) education coefficient matches SAS `read1.lst`", "MODEL3 (`ED76 = 0.0747`). Row 13 incremental *F*-test *p*-values for columns", "(4)–5) differ slightly from the paper (0.619 vs. 0.462; 0.028 vs. 0.165)." ) ) ) |> card93_gt_stub_footnote( table_data, "9. Parental education (years + missing indicators)", "Years of mother's and father's education plus indicators for imputed values (`nodaded`, `nomomed`)." ) |> card93_gt_stub_footnote( table_data, "10. Interacted parental education classes", "Eight interacted parental-education classes (`f1`–`f8` from `famed`)." ) |> card93_gt_stub_footnote( table_data, "11. Family structure (2 indicators)", "Lived with both parents (`momdad14`); lived with single mother (`sinmom14`)." ) gt_tbl <- card93_gt_finalize(gt_tbl) save(table_data, gt_tbl, replication, paper_targets, file = here("04-topics/rep-card1993/Rcode/Table_II.RData"))