# Load required libraries library(tidyverse) library(haven) # For reading Stata files library(ivreg) # For instrumental variables regression library(knitr) # For generating tables library(kableExtra) library(broom) # For tidying model results library(here) library(gt) source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R")) # Read the data data <- read_dta(here("04-topics/rep-ak1991/stata/raw_data.dta")) # Rename variables data <- data %>% rename( AGE = v1, AGEQ = v2, EDUC = v4, ENOCENT = v5, ESOCENT = v6, LWKLYWGE = v9, MARRIED = v10, MIDATL = v11, MT = v12, NEWENG = v13, CENSUS = v16, QOB = v18, RACE = v19, SMSA = v20, SOATL = v21, WNOCENT = v24, WSOCENT = v25, YOB = v27 ) # Adjust YOB for years >= 1900 data <- data %>% mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB)) # Create YOB dummies for (i in 0:9) { data <- data %>% mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0)) } # Create QOB dummies for (i in 1:4) { data <- data %>% mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0)) } # Create QOB*YOB interaction dummies for (j in 1:3) { for (i in 0:9) { data <- data %>% mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i))) } } # Create COHORT variable and adjust AGEQ data <- data %>% mutate( COHORT = case_when( YOB <= 29 & YOB >= 20 ~ 2029, YOB <= 39 & YOB >= 30 ~ 3039, YOB <= 49 & YOB >= 40 ~ 4049, TRUE ~ NA_real_ ), AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ), AGEQSQ = AGEQ * AGEQ ) # Select particular men born # Filter data for COHORT >3000 and COHORT < 3040 data <- data %>% filter(COHORT > 3000 & COHORT < 3040) ## Define excluded instrumental variables iv_excluded <- c( "QTR1YR0", "QTR1YR1", "QTR1YR2", "QTR1YR3", "QTR1YR4", "QTR1YR5", "QTR1YR6", "QTR1YR7", "QTR1YR8", "QTR1YR9", "QTR2YR0", "QTR2YR1", "QTR2YR2", "QTR2YR3", "QTR2YR4", "QTR2YR5", "QTR2YR6", "QTR2YR7", "QTR2YR8", "QTR2YR9", "QTR3YR0", "QTR3YR1", "QTR3YR2", "QTR3YR3", "QTR3YR4", "QTR3YR5", "QTR3YR6", "QTR3YR7", "QTR3YR8", "QTR3YR9" ) # Define regression formulas formula1 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 formula2 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 iv2_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8" ) iv2_all <- c(iv2_included, iv_excluded) formula3 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ formula4 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ iv4_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "AGEQ", "AGEQSQ" ) iv4_all <- c(iv4_included, iv_excluded) formula5 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 formula6 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 iv6_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "RACE", "MARRIED", "SMSA", "NEWENG", "MIDATL", "ENOCENT", "WNOCENT", "SOATL", "ESOCENT", "WSOCENT", "MT" ) iv6_all <- c(iv6_included, iv_excluded) formula7 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ formula8 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + AGEQ + AGEQSQ + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + AGEQ + AGEQSQ + QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 ## included instrumental variables iv8_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "RACE", "MARRIED", "SMSA", "NEWENG", "MIDATL", "ENOCENT", "WNOCENT", "SOATL", "ESOCENT", "WSOCENT", "MT", "AGEQ", "AGEQSQ" ) iv8_all <- c(iv8_included, iv_excluded) # Run regressions model1 <- lm(formula1, data = data) # summary(model1) model2 <- ivreg(formula2, data = data) # summary(model2) model3 <- lm(formula3, data = data) # summary(model3) model4 <- ivreg(formula4, data = data) # summary(model4) model5 <- lm(formula5, data = data) # summary(model5) model6 <- ivreg(formula6, data = data) # summary(model6) model7 <- lm(formula7, data = data) # summary(model7) model8 <- ivreg(formula8, data = data) # summary(model8) # Extract coefficients and standard errors table_data <- bind_rows( broom::tidy(model1, conf.int = TRUE) %>% mutate(model = "model1"), broom::tidy(model2, conf.int = TRUE) %>% mutate(model = "model2"), broom::tidy(model3, conf.int = TRUE) %>% mutate(model = "model3"), broom::tidy(model4, conf.int = TRUE) %>% mutate(model = "model4"), broom::tidy(model5, conf.int = TRUE) %>% mutate(model = "model5"), broom::tidy(model6, conf.int = TRUE) %>% mutate(model = "model6"), broom::tidy(model7, conf.int = TRUE) %>% mutate(model = "model7"), broom::tidy(model8, conf.int = TRUE) %>% mutate(model = "model8") ) %>% filter(term %in% c("EDUC", "RACE", "SMSA", "MARRIED", "AGEQ", "AGEQSQ")) %>% mutate( term = case_when( term == "EDUC" ~ "Years of education", term == "RACE" ~ "Race (1 = black)", term == "SMSA" ~ "SMSA (1 = center city)", term == "MARRIED" ~ "Married (1 = married)", term == "AGEQ" ~ "Age", term == "AGEQSQ" ~ "Age-squared", TRUE ~ term ), p.sig = case_when( p.value < 0.001 ~ "***", p.value < 0.01 ~ "**", p.value < 0.05 ~ "*", TRUE ~ "" ), value = ak91_coef_cell(estimate, std.error, p.sig) ) %>% select(term, model, value) %>% pivot_wider( names_from = model, values_from = value ) %>% # reorder the term mutate( term = factor( term, levels = c("Years of education", "Race (1 = black)", "SMSA (1 = center city)", "Married (1 = married)", "Age", "Age-squared")) ) %>% arrange(term) %>% rename_all(~ all_of(c("Independent Variables", "(1)
OLS", "(2)
TSLS", "(3)
OLS", "(4)
TSLS", "(5)
OLS", "(6)
TSLS", "(7)
OLS", "(8)
TSLS"))) # Create the table using kableExtra kbl_out <- table_data %>% kbl( format = "html", escape = FALSE, scroll = TRUE, align = c("l", "c", "c", "c", "c", "c", "c", "c", "c", "c"), caption = "Table V: OLS and TSLS Estimates of the Return to Education for Men Born 1930-1939: 1980 Census" ) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% footnote( general = "Yes-No Dummies are not kept in the shown figure.", number = c( "$^{*}$ p<0.05; $^{**}$ p<0.01; $^{***}$ p<0.001", "The dependent variable is the log of weekly wage. The sample includes men born 1930-1939 in the 1980 Census.", "The excluded instrumental variables for all TSLS models are not shown in the table and are the same which are the 27 interaction terms between the quarter of birth and year of birth $QTR_j \\times YR_i$, $i \\in \\{0, 1, \\dots, 9\\}$ and $j \\in \\{1, 2, 3\\}$." ) ) # Create the table using gt table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data %>% gt() %>% # Set title tab_header( title = "Table V: OLS and TSLS Estimates of the Return to Education for Men Born 1930-1939: 1980 Census" ) %>% # Set column alignment cols_align( align = "left", columns = 1 ) %>% cols_align( align = "center", columns = 2:9 ) %>% # Add footnotes tab_footnote( footnote = "Yes-No Dummies are not kept in the shown figure.", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("$^{*}p<0.05; ^{**}p<0.01; ^{***}p<0.001$"), locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = "The dependent variable is the log of weekly wage. The sample includes men born 1930-1939 in the 1980 Census.", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("The excluded instrumental variables for all TSLS models are not shown in the table and are the same which are the 27 interaction terms between the quarter of birth and year of birth $QTR_j \\times YR_i$, $i \\in \\{0, 1, \\dots, 9\\}$ and $j \\in \\{1, 2, 3\\}$."), locations = cells_title(groups = "title") ) %>% # Set table style opt_row_striping() %>% tab_options( table.font.size = px(14), heading.background.color = "#e8e8e8", column_labels.font.weight = "bold", table.border.top.width = px(3), table.border.bottom.width = px(3), container.width = pct(100), table.width = pct(100) ) %>% # Ensure math symbols render correctly fmt_markdown(columns = everything()) %>% # Ensure all column headers use gt package's html() syntax cols_label(.list = ak91_gt_cols_label(names(table_data))) # Save the objects to RData file save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_V.RData"))