# Load required packages library(here) library(tidyverse) library(haven) # For reading Stata data library(broom) # For tidying regression results library(knitr) # For generating tables library(gt) #renv::install("ivreg") library(ivreg) # For instrumental variables regression library(kableExtra) source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R")) # Read the data raw_data <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/raw_data.dta") # Rename variables to match Stata code data <- raw_data %>% rename( AGE = v1, AGEQ = v2, EDUC = v4, ENOCENT = v5, ESOCENT = v6, LWKLYWGE = v9, MARRIED = v10, MIDATL = v11, MT = v12, NEWENG = v13, CENSUS = v16, QOB = v18, RACE = v19, SMSA = v20, SOATL = v21, WNOCENT = v24, WSOCENT = v25, YOB = v27 ) # Adjust YOB for years >= 1900 data <- data %>% mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB)) # Create YOB dummies for (i in 0:9) { data <- data %>% mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0)) } # Create QOB dummies for (i in 1:4) { data <- data %>% mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0)) } # Create QOB*YOB interaction dummies for (j in 1:3) { for (i in 0:9) { data <- data %>% mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i))) } } # Create COHORT variable and adjust AGEQ data <- data %>% mutate( COHORT = case_when( YOB <= 29 & YOB >= 20 ~ 2029, YOB <= 39 & YOB >= 30 ~ 3039, YOB <= 49 & YOB >= 40 ~ 4049, TRUE ~ NA_real_ ), AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ), AGEQSQ = AGEQ * AGEQ ) # Filter data for COHORT < 2030 data <- data %>% filter(COHORT < 2030) # Define formulas for different models # Model 1: Basic OLS formula1 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 # Model 2: 2SLS with QOB*YOB interactions as instruments formula2 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 # Model 3: OLS with age controls formula3 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ # Model 4: 2SLS with age controls formula4 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ | YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 # Model 5: OLS with demographic controls formula5 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 # Model 6: 2SLS with demographic controls formula6 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 # Model 7: OLS with all controls formula7 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ # Model 8: 2SLS with all controls formula8 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ | RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 # Run regressions model1 <- lm(formula1, data = data) # summary(model1) model2 <- ivreg(formula2, data = data) # summary(model2) model3 <- lm(formula3, data = data) # summary(model3) model4 <- ivreg(formula4, data = data) # summary(model4) model5 <- lm(formula5, data = data) # summary(model5) model6 <- ivreg(formula6, data = data) # summary(model6) model7 <- lm(formula7, data = data) # summary(model7) model8 <- ivreg(formula8, data = data) # summary(model8) # Extract coefficients and standard errors results <- bind_rows( tidy(model1) %>% mutate(model = "OLS1"), tidy(model2) %>% mutate(model = "TSLS1"), tidy(model3) %>% mutate(model = "OLS2"), tidy(model4) %>% mutate(model = "TSLS2"), tidy(model5) %>% mutate(model = "OLS3"), tidy(model6) %>% mutate(model = "TSLS3"), tidy(model7) %>% mutate(model = "OLS4"), tidy(model8) %>% mutate(model = "TSLS4") ) # Create table data table_data <- results %>% filter(term %in% c("EDUC", "RACE", "SMSA", "MARRIED", "AGEQ", "AGEQSQ")) %>% mutate( term = case_when( term == "EDUC" ~ "Years of education", term == "RACE" ~ "Race (1 = black)", term == "SMSA" ~ "SMSA (1 = center city)", term == "MARRIED" ~ "Married (1 = married)", term == "AGEQ" ~ "Age", term == "AGEQSQ" ~ "Age-squared", TRUE ~ term ), p.sig = case_when( p.value < 0.001 ~ "***", p.value < 0.01 ~ "**", p.value < 0.05 ~ "*", TRUE ~ "" ), value = ak91_coef_cell(estimate, std.error, p.sig) ) %>% select(term, model, value) %>% pivot_wider( names_from = model, values_from = value ) %>% # reorder the term mutate( term = factor( term, levels = c("Years of education", "Race (1 = black)", "SMSA (1 = center city)", "Married (1 = married)", "Age", "Age-squared")) ) %>% arrange(term) %>% rename_all(~ all_of(c("Independent Variables", "(1)
OLS", "(2)
TSLS", "(3)
OLS", "(4)
TSLS", "(5)
OLS", "(6)
TSLS", "(7)
OLS", "(8)
TSLS"))) # Create the table using kableExtra kbl_iv <- table_data %>% kbl( format = "html", align = c('l', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c'), caption = "Table IV: OLS and TSLS Estimates of the Return to Education for Men Born 1920-1929: 1970 Census", escape = FALSE ) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% #add_header_above(c("Independent Variables" = 1, "(1) OLS" = 1, "(2) TSLS" = 1, "(3) OLS" = 1, # "(4) TSLS" = 1, "(5) OLS" = 1, "(6) TSLS" = 1, "(7) OLS" = 1, "(8) TSLS" = 1)) %>% kableExtra::footnote( general = "Yes-No Dummies are not kept in the shown figure.", number = c( "$^{*}$ p<0.05; $^{**}$ p<0.01; $^{***}$ p<0.001" ) ) # Create the table using gt table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data %>% gt() %>% # Set title tab_header( title = "Table IV: OLS and TSLS Estimates of the Return to Education for Men Born 1920-1929: 1970 Census" ) %>% # Set column alignment cols_align( align = "left", columns = 1 ) %>% cols_align( align = "center", columns = 2:9 ) %>% # Add footnotes tab_footnote( footnote = "Yes-No Dummies are not kept in the shown figure.", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("$^{*}p<0.05; ^{**}p<0.01; ^{***}p<0.001$"), locations = cells_title(groups = "title") ) %>% # Set table style opt_row_striping() %>% tab_options( table.font.size = px(14), heading.background.color = "#e8e8e8", column_labels.font.weight = "bold", table.border.top.width = px(3), table.border.bottom.width = px(3) ) %>% # Ensure math symbols render correctly fmt_markdown(columns = everything()) %>% # Ensure all column headers use gt package's html() syntax, e.g. (1)
OLS # Dynamically process all column headers as HTML cols_label(.list = ak91_gt_cols_label(names(kbl_iv))) # Save the objects to RData file save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_IV.RData"))