# Load required packages
library(here)
library(tidyverse)
library(haven) # For reading Stata data
library(broom) # For tidying regression results
library(knitr) # For generating tables
library(gt)
#renv::install("ivreg")
library(ivreg) # For instrumental variables regression
library(kableExtra)
source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R"))
# Read the data
raw_data <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/raw_data.dta")
# Rename variables to match Stata code
data <- raw_data %>%
rename(
AGE = v1,
AGEQ = v2,
EDUC = v4,
ENOCENT = v5,
ESOCENT = v6,
LWKLYWGE = v9,
MARRIED = v10,
MIDATL = v11,
MT = v12,
NEWENG = v13,
CENSUS = v16,
QOB = v18,
RACE = v19,
SMSA = v20,
SOATL = v21,
WNOCENT = v24,
WSOCENT = v25,
YOB = v27
)
# Adjust YOB for years >= 1900
data <- data %>%
mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB))
# Create YOB dummies
for (i in 0:9) {
data <- data %>%
mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0))
}
# Create QOB dummies
for (i in 1:4) {
data <- data %>%
mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0))
}
# Create QOB*YOB interaction dummies
for (j in 1:3) {
for (i in 0:9) {
data <- data %>%
mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i)))
}
}
# Create COHORT variable and adjust AGEQ
data <- data %>%
mutate(
COHORT = case_when(
YOB <= 29 & YOB >= 20 ~ 2029,
YOB <= 39 & YOB >= 30 ~ 3039,
YOB <= 49 & YOB >= 40 ~ 4049,
TRUE ~ NA_real_
),
AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ),
AGEQSQ = AGEQ * AGEQ
)
# Filter data for COHORT < 2030
data <- data %>%
filter(COHORT < 2030)
# Define formulas for different models
# Model 1: Basic OLS
formula1 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8
# Model 2: 2SLS with QOB*YOB interactions as instruments
formula2 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 |
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 +
QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 +
QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 +
QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9
# Model 3: OLS with age controls
formula3 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ
# Model 4: 2SLS with age controls
formula4 <- LWKLYWGE ~ EDUC + YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ |
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ +
QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 +
QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 +
QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9
# Model 5: OLS with demographic controls
formula5 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8
# Model 6: 2SLS with demographic controls
formula6 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 |
RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 +
QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 +
QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 +
QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9
# Model 7: OLS with all controls
formula7 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ
# Model 8: 2SLS with all controls
formula8 <- LWKLYWGE ~ EDUC + RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ |
RACE + MARRIED + SMSA + NEWENG + MIDATL + ENOCENT + WNOCENT + SOATL + ESOCENT + WSOCENT + MT +
YR0 + YR1 + YR2 + YR3 + YR4 + YR5 + YR6 + YR7 + YR8 + AGEQ + AGEQSQ +
QTR1YR0 + QTR1YR1 + QTR1YR2 + QTR1YR3 + QTR1YR4 + QTR1YR5 + QTR1YR6 + QTR1YR7 + QTR1YR8 + QTR1YR9 +
QTR2YR0 + QTR2YR1 + QTR2YR2 + QTR2YR3 + QTR2YR4 + QTR2YR5 + QTR2YR6 + QTR2YR7 + QTR2YR8 + QTR2YR9 +
QTR3YR0 + QTR3YR1 + QTR3YR2 + QTR3YR3 + QTR3YR4 + QTR3YR5 + QTR3YR6 + QTR3YR7 + QTR3YR8 + QTR3YR9
# Run regressions
model1 <- lm(formula1, data = data)
# summary(model1)
model2 <- ivreg(formula2, data = data)
# summary(model2)
model3 <- lm(formula3, data = data)
# summary(model3)
model4 <- ivreg(formula4, data = data)
# summary(model4)
model5 <- lm(formula5, data = data)
# summary(model5)
model6 <- ivreg(formula6, data = data)
# summary(model6)
model7 <- lm(formula7, data = data)
# summary(model7)
model8 <- ivreg(formula8, data = data)
# summary(model8)
# Extract coefficients and standard errors
results <- bind_rows(
tidy(model1) %>% mutate(model = "OLS1"),
tidy(model2) %>% mutate(model = "TSLS1"),
tidy(model3) %>% mutate(model = "OLS2"),
tidy(model4) %>% mutate(model = "TSLS2"),
tidy(model5) %>% mutate(model = "OLS3"),
tidy(model6) %>% mutate(model = "TSLS3"),
tidy(model7) %>% mutate(model = "OLS4"),
tidy(model8) %>% mutate(model = "TSLS4")
)
# Create table data
table_data <- results %>%
filter(term %in% c("EDUC", "RACE", "SMSA", "MARRIED", "AGEQ", "AGEQSQ")) %>%
mutate(
term = case_when(
term == "EDUC" ~ "Years of education",
term == "RACE" ~ "Race (1 = black)",
term == "SMSA" ~ "SMSA (1 = center city)",
term == "MARRIED" ~ "Married (1 = married)",
term == "AGEQ" ~ "Age",
term == "AGEQSQ" ~ "Age-squared",
TRUE ~ term
),
p.sig = case_when(
p.value < 0.001 ~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
TRUE ~ ""
),
value = ak91_coef_cell(estimate, std.error, p.sig)
) %>%
select(term, model, value) %>%
pivot_wider(
names_from = model,
values_from = value
) %>%
# reorder the term
mutate(
term = factor(
term,
levels = c("Years of education", "Race (1 = black)", "SMSA (1 = center city)", "Married (1 = married)", "Age", "Age-squared"))
) %>%
arrange(term) %>%
rename_all(~ all_of(c("Independent Variables", "(1)
OLS", "(2)
TSLS", "(3)
OLS", "(4)
TSLS", "(5)
OLS", "(6)
TSLS", "(7)
OLS", "(8)
TSLS")))
# Create the table using kableExtra
kbl_iv <- table_data %>%
kbl(
format = "html",
align = c('l', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c'),
caption = "Table IV: OLS and TSLS Estimates of the Return to Education for Men Born 1920-1929: 1970 Census",
escape = FALSE
) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
#add_header_above(c("Independent Variables" = 1, "(1) OLS" = 1, "(2) TSLS" = 1, "(3) OLS" = 1,
# "(4) TSLS" = 1, "(5) OLS" = 1, "(6) TSLS" = 1, "(7) OLS" = 1, "(8) TSLS" = 1)) %>%
kableExtra::footnote(
general = "Yes-No Dummies are not kept in the shown figure.",
number = c(
"$^{*}$ p<0.05; $^{**}$ p<0.01; $^{***}$ p<0.001"
)
)
# Create the table using gt
table_data <- ak91_quarto_blank_df(table_data)
gt_tbl <- table_data %>%
gt() %>%
# Set title
tab_header(
title = "Table IV: OLS and TSLS Estimates of the Return to Education for Men Born 1920-1929: 1970 Census"
) %>%
# Set column alignment
cols_align(
align = "left",
columns = 1
) %>%
cols_align(
align = "center",
columns = 2:9
) %>%
# Add footnotes
tab_footnote(
footnote = "Yes-No Dummies are not kept in the shown figure.",
locations = cells_title(groups = "title")
) %>%
tab_footnote(
footnote = md("$^{*}p<0.05; ^{**}p<0.01; ^{***}p<0.001$"),
locations = cells_title(groups = "title")
) %>%
# Set table style
opt_row_striping() %>%
tab_options(
table.font.size = px(14),
heading.background.color = "#e8e8e8",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.bottom.width = px(3)
) %>%
# Ensure math symbols render correctly
fmt_markdown(columns = everything()) %>%
# Ensure all column headers use gt package's html() syntax, e.g. (1)
OLS
# Dynamically process all column headers as HTML
cols_label(.list = ak91_gt_cols_label(names(kbl_iv)))
# Save the objects to RData file
save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_IV.RData"))