# Load required libraries library(tidyverse) library(haven) # For reading Stata files library(ivreg) # For instrumental variables regression library(knitr) # For generating tables library(kableExtra) library(broom) # For tidying model results library(here) library(gt) # For creating gt tables source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R")) # Read the data data <- read_dta(here("04-topics/rep-ak1991/stata/raw_data.dta")) # Rename variables data <- data %>% rename( AGE = v1, AGEQ = v2, EDUC = v4, ENOCENT = v5, ESOCENT = v6, LWKLYWGE = v9, MARRIED = v10, MIDATL = v11, MT = v12, NEWENG = v13, CENSUS = v16, QOB = v18, RACE = v19, SMSA = v20, SOATL = v21, WNOCENT = v24, WSOCENT = v25, YOB = v27 ) # Adjust YOB for years >= 1900 data <- data %>% mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB)) # Create YOB dummies for (i in 0:9) { data <- data %>% mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0)) } # Create QOB dummies for (i in 1:4) { data <- data %>% mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0)) } # Create QOB*YOB interaction dummies for (j in 1:3) { for (i in 0:9) { data <- data %>% mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i))) } } # Create COHORT variable and adjust AGEQ data <- data %>% mutate( COHORT = case_when( YOB <= 29 & YOB >= 20 ~ 2029, YOB <= 39 & YOB >= 30 ~ 3039, YOB <= 49 & YOB >= 40 ~ 4049, TRUE ~ NA_real_ ), AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ), AGEQSQ = AGEQ * AGEQ ) # Select particular men born # Filter data for COHORT > 4000 data <- data %>% filter(COHORT > 4000) # Define excluded instrumental variables iv_excluded <- c( "QTR1YR0", "QTR1YR1", "QTR1YR2", "QTR1YR3", "QTR1YR4", "QTR1YR5", "QTR1YR6", "QTR1YR7", "QTR1YR8", "QTR1YR9", "QTR2YR0", "QTR2YR1", "QTR2YR2", "QTR2YR3", "QTR2YR4", "QTR2YR5", "QTR2YR6", "QTR2YR7", "QTR2YR8", "QTR2YR9", "QTR3YR0", "QTR3YR1", "QTR3YR2", "QTR3YR3", "QTR3YR4", "QTR3YR5", "QTR3YR6", "QTR3YR7", "QTR3YR8", "QTR3YR9" ) # Define regression formulas formula1 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 formula2 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 iv2_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8" ) iv2_all <- c(iv2_included, iv_excluded) formula3 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ formula4 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ iv4_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "AGEQ", "AGEQSQ" ) iv4_all <- c(iv4_included, iv_excluded) formula5 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 formula6 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT iv6_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "RACE", "MARRIED", "SMSA", "NEWENG", "MIDATL", "ENOCENT", "WNOCENT", "SOATL", "ESOCENT", "WSOCENT", "MT" ) iv6_all <- c(iv6_included, iv_excluded) formula7 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ formula8 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + AGEQ + AGEQSQ + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 | QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 + QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 + QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9 + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT + AGEQ + AGEQSQ iv8_included <- c( "YR0", "YR1", "YR2", "YR3", "YR4", "YR5", "YR6", "YR7", "YR8", "RACE", "MARRIED", "SMSA", "NEWENG", "MIDATL", "ENOCENT", "WNOCENT", "SOATL", "ESOCENT", "WSOCENT", "MT", "AGEQ", "AGEQSQ" ) iv8_all <- c(iv8_included, iv_excluded) # Run regressions model1 <- lm(formula1, data = data) model2 <- ivreg(formula2, data = data) model3 <- lm(formula3, data = data) model4 <- ivreg(formula4, data = data) model5 <- lm(formula5, data = data) model6 <- ivreg(formula6, data = data) model7 <- lm(formula7, data = data) model8 <- ivreg(formula8, data = data) # Extract coefficients and standard errors table_data <- bind_rows( broom::tidy(model1, conf.int = TRUE) %>% mutate(model = "model1"), broom::tidy(model2, conf.int = TRUE) %>% mutate(model = "model2"), broom::tidy(model3, conf.int = TRUE) %>% mutate(model = "model3"), broom::tidy(model4, conf.int = TRUE) %>% mutate(model = "model4"), broom::tidy(model5, conf.int = TRUE) %>% mutate(model = "model5"), broom::tidy(model6, conf.int = TRUE) %>% mutate(model = "model6"), broom::tidy(model7, conf.int = TRUE) %>% mutate(model = "model7"), broom::tidy(model8, conf.int = TRUE) %>% mutate(model = "model8") ) %>% filter(term %in% c("EDUC", "RACE", "SMSA", "MARRIED", "AGEQ", "AGEQSQ")) %>% mutate( term = case_when( term == "EDUC" ~ "Years of education", term == "RACE" ~ "Race (1 = black)", term == "SMSA" ~ "SMSA (1 = center city)", term == "MARRIED" ~ "Married (1 = married)", term == "AGEQ" ~ "Age", term == "AGEQSQ" ~ "Age-squared", TRUE ~ term ), p.sig = case_when( p.value < 0.001 ~ "***", p.value < 0.01 ~ "**", p.value < 0.05 ~ "*", TRUE ~ "" ), value = ak91_coef_cell(estimate, std.error, p.sig) ) %>% select(term, model, value) %>% pivot_wider( names_from = model, values_from = value ) %>% # reorder the term mutate( term = factor( term, levels = c("Years of education", "Race (1 = black)", "SMSA (1 = center city)", "Married (1 = married)", "Age", "Age-squared")) ) %>% arrange(term) %>% rename_all(~ all_of(c("Independent Variables", "(1)
OLS", "(2)
TSLS", "(3)
OLS", "(4)
TSLS", "(5)
OLS", "(6)
TSLS", "(7)
OLS", "(8)
TSLS"))) # Create the table using kableExtra kbl_out <- table_data %>% kbl( format = "html", escape = FALSE, scroll = TRUE, align = c("l", "c", "c", "c", "c", "c", "c", "c", "c", "c"), caption = "Table VI: OLS and TSLS Estimates of the Return to Education for Men Born 1940-1949: 1980 Census" ) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% footnote( general = "Yes-No Dummies are not kept in the shown figure.", number = c( "$^{*}$ p<0.05; $^{**}$ p<0.01; $^{***}$ p<0.001", "The dependent variable is the log of weekly wage. The sample includes men born 1940-1949 in the 1980 Census.", "The excluded instrumental variables for all TSLS models are not shown in the table and are the same which are the 27 interaction terms between the quarter of birth and year of birth $QTR_j \\times YR_i$, $i \\in \\{0, 1, \\dots, 9\\}$ and $j \\in \\{1, 2, 3\\}$." ) ) # Create the table using gt table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data %>% gt() %>% # Set title tab_header( title = "Table VI: OLS and TSLS Estimates of the Return to Education for Men Born 1940-1949: 1980 Census" ) %>% # Set column alignment cols_align( align = "left", columns = 1 ) %>% cols_align( align = "center", columns = 2:9 ) %>% # Add footnotes tab_footnote( footnote = "Yes-No Dummies are not kept in the shown figure.", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("$^{*}p<0.05; ^{**}p<0.01; ^{***}p<0.001$"), locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = "The dependent variable is the log of weekly wage. The sample includes men born 1940-1949 in the 1980 Census.", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("The excluded instrumental variables for all TSLS models are not shown in the table and are the same which are the 27 interaction terms between the quarter of birth and year of birth $QTR_j \\times YR_i$, $i \\in \\{0, 1, \\dots, 9\\}$ and $j \\in \\{1, 2, 3\\}$."), locations = cells_title(groups = "title") ) %>% # Set table style opt_row_striping() %>% tab_options( table.font.size = px(14), heading.background.color = "#e8e8e8", column_labels.font.weight = "bold", table.border.top.width = px(3), table.border.bottom.width = px(3), container.width = pct(100), table.width = px(100) ) %>% # Ensure math symbols render correctly fmt_markdown(columns = everything()) %>% # Ensure all column headers use gt package's html() syntax cols_label(.list = ak91_gt_cols_label(names(table_data))) # Save the objects to RData file save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_VI.RData"))