# Card (1993) Figure 1 – mean education by predicted-education quartile × nearc4. library(tidyverse) library(here) library(ggplot2) source(here("04-topics/rep-card1993/Rcode/card1993-data-prep.R")) data <- load_card93_data() # Age dummies (1966 age groups, same bins as Table 1). data <- data |> mutate( age_1415 = as.integer(age66 >= 14 & age66 <= 15), age_1617 = as.integer(age66 >= 16 & age66 <= 17), age_1820 = as.integer(age66 >= 18 & age66 <= 20), age_2124 = as.integer(age66 >= 21 & age66 <= 24) ) pred_x <- c( card93_region66, "smsa66r", "age_1415", "age_1617", "age_1820", "age_2124", "black", card93_fb_full ) no_college <- data |> filter(nearc4 == 0L) fml <- as.formula(paste("ed76 ~", paste(pred_x, collapse = " + "))) fit <- lm(fml, data = no_college) data$pred_ed_nocollege <- predict(fit, newdata = data) data$pred_quartile <- ntile(data$pred_ed_nocollege, 4L) fig_summary <- data |> group_by(pred_quartile, nearc4) |> summarise(mean_ed = mean(ed76), n = n(), .groups = "drop") |> mutate( nearc4_lab = if_else(nearc4 == 1L, "Near 4-year college", "No nearby college") ) fig_plot <- fig_summary |> ggplot(aes(x = pred_quartile, y = mean_ed, colour = nearc4_lab, group = nearc4_lab)) + geom_line(linewidth = 0.9) + geom_point(size = 2.5) + scale_x_continuous(breaks = 1:4, labels = paste("Q", 1:4)) + scale_colour_manual(values = c("No nearby college" = "#2166ac", "Near 4-year college" = "#b2182b")) + labs( x = "Quartile of predicted education (no college nearby subsample fit)", y = "Mean completed education in 1976 (years)", colour = NULL, title = "Mean completed education by predicted-education quartile and college proximity in 1966", caption = "Note: Prediction equation is fit to subsample with no college nearby." ) + theme_minimal(base_size = 12) + theme(legend.position = "bottom") r2_nocollege <- summary(fit)$r.squared paper_note <- tibble( metric = c("R2 prediction (no college subsample)", "Lowest quartile gap (near - no)"), value = c( r2_nocollege, { q1 <- fig_summary |> filter(pred_quartile == 1L) q1$mean_ed[q1$nearc4 == 1L] - q1$mean_ed[q1$nearc4 == 0L] } ) ) message("Figure 1 summary:") print(fig_summary) print(paper_note) save(fig_plot, fig_summary, paper_note, fit, file = here("04-topics/rep-card1993/Rcode/Figure_1.RData"))