# Load required packages----
library(here)
library(tidyverse)
library(haven) # For reading Stata data
library(broom) # For tidying regression results
library(knitr) # For generating tables
library(kableExtra)
library(gt)
source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R"))
# Read the data
raw_data <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/raw_data.dta")
appendix2_table <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/appendix2_table.dta")
# Rename variables to match Stata code
data <- raw_data %>%
rename(
AGE = v1,
AGEQ = v2,
EDUC = v4,
ENOCENT = v5,
ESOCENT = v6,
LWKLYWGE = v9,
MARRIED = v10,
MIDATL = v11,
MT = v12,
NEWENG = v13,
CENSUS = v16,
SOB = v17,
QOB = v18,
RACE = v19,
SMSA = v20,
SOATL = v21,
WNOCENT = v24,
WSOCENT = v25,
YOB = v27
)
# Adjust YOB for years >= 1900
data <- data %>%
mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB))
# Create YOB dummies
for (i in 0:9) {
data <- data %>%
mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0))
}
# Create QOB dummies
for (i in 1:4) {
data <- data %>%
mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0))
}
# Create QOB*YOB interaction dummies
for (j in 1:3) {
for (i in 0:9) {
data <- data %>%
mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i)))
}
}
# Create COHORT variable
data <- data %>%
mutate(
COHORT = case_when(
YOB <= 29 & YOB >= 20 ~ 2029,
YOB <= 39 & YOB >= 30 ~ 3039,
YOB <= 49 & YOB >= 40 ~ 4049,
TRUE ~ NA_real_
),
AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ),
AGEQSQ = AGEQ * AGEQ
)
# Merge with appendix2_table
data_all <- data %>%
left_join(appendix2_table, by = "SOB")
# Analysis for Table II
yob_table2 <- 44
# First Column: 16
# Create dt2 variable for age 16
dt2_16 <- data_all %>%
filter(YOB == yob_table2, at_age1960 == 16) %>%
mutate(dt2 = 0) %>%
mutate(dt2 = if_else(EDUC >= 12, 1, dt2))
# Regression for QTR1
model1 <- lm(dt2 ~ QTR1, data = dt2_16)
model1_tidy <- tidy(model1) %>%
# Convert to percentage
mutate_at(c("estimate", "std.error"), \(x) 100*x)
# summary(model1)
# Regression for QTR2, QTR3, QTR4
# Create a subset for model2 to match Stata's if condition
dt2_16_model2 <- dt2_16 %>%
select(dt2, QTR2, QTR3, QTR4)
model2 <- lm(dt2 ~ QTR2 + QTR3 + QTR4, data = dt2_16_model2)
model2_tidy <- tidy(model2) %>%
# Convert to percentage
mutate_at(c("estimate", "std.error"), \(x) 100*x)
# summary(model2)
# Second Column: 17 or 18
# Create dt2 variable for age 17 or 18
dt2_17_18 <- data_all %>%
filter(YOB == yob_table2, at_age1960 %in% c(17, 18)) %>%
mutate(dt2 = 0) %>%
mutate(dt2 = if_else(EDUC >= 12, 1, dt2))
# Regression for QTR1
model3 <- lm(dt2 ~ QTR1, data = dt2_17_18)
model3_tidy <- tidy(model3) %>%
# Convert to percentage
mutate_at(c("estimate", "std.error"), \(x) 100*x)
# summary(model3)
# Regression for QTR2, QTR3, QTR4
# Create a subset for model4 to match Stata's if condition
dt2_17_18_model4 <- dt2_17_18 %>%
select(dt2, QTR2, QTR3, QTR4)
model4 <- lm(dt2 ~ QTR2 + QTR3 + QTR4, data = dt2_17_18_model4)
model4_tidy <- tidy(model4) %>%
# Convert to percentage
mutate_at(c("estimate", "std.error"), \(x) 100*x)
# summary(model4)
# Create table data
table_data <- tibble(
"Date of Birth" = c("1. Jan 1-Mar 31, 1994", "2. Apr 1-Dec 31, 1994", "3. Within-state diff."),
#"Type of state law" = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)", ""),
"School-leaving age: 16
(1)" = c(
ak91_fmt_br_se(model2_tidy$estimate[1], model2_tidy$std.error[1]),
ak91_fmt_br_se(model1_tidy$estimate[1], model1_tidy$std.error[1]),
ak91_fmt_br_se(
model2_tidy$estimate[1] - model1_tidy$estimate[1],
sqrt(model2_tidy$std.error[1]^2 + model1_tidy$std.error[1]^2)
)
),
"School-leaving age: 17 or 18
(2)" = c(
ak91_fmt_br_se(model4_tidy$estimate[1], model4_tidy$std.error[1]),
ak91_fmt_br_se(model3_tidy$estimate[1], model3_tidy$std.error[1]),
ak91_fmt_br_se(
model4_tidy$estimate[1] - model3_tidy$estimate[1],
sqrt(model4_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2)
)
),
"Column (1) - (2)" = c(
ak91_fmt_br_se(
model2_tidy$estimate[1] - model4_tidy$estimate[1],
sqrt(model2_tidy$std.error[1]^2 + model4_tidy$std.error[1]^2)
),
ak91_fmt_br_se(
model1_tidy$estimate[1] - model3_tidy$estimate[1],
sqrt(model1_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2)
),
ak91_fmt_br_se(
(model2_tidy$estimate[1] - model1_tidy$estimate[1]) -
(model4_tidy$estimate[1] - model3_tidy$estimate[1]),
sqrt(
model2_tidy$std.error[1]^2 + model1_tidy$std.error[1]^2 +
model4_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2
)
)
)
)
# Create the table using kableExtra
kbl_out <- table_data %>%
kbl(
format = "html", align = c('l', 'c', 'c', 'c'),
caption = "Table II: Percent enrolled April 1, 1960",
escape = FALSE
) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
add_header_above(c(" " = 1, "Type of state law" = 2, " " =1)) %>%
add_header_above(c(" " = 1, "Percent enrolled April 1, 1960" = 2, " " =1)) %>%
footnote(
general = c(
"Standard errors in parentheses, both estimates and standard errors are in percentage",
"The result of this table will not be the same as in the paper since the author doesn't provide data for this session. I only used the available data to approximate the result of this table. Results in the last column were manually calculated e.g. $-0.5611=(-0.8301)-(-0.2690)$")
)
# Create the table using gt
table_data <- ak91_quarto_blank_df(table_data)
gt_tbl <- table_data %>%
gt() %>%
# Set title
tab_header(
title = "Table II: Percent enrolled April 1, 1960"
) %>%
# Set column alignment
cols_align(
align = "left",
columns = 1
) %>%
cols_align(
align = "center",
columns = 2:4
) %>%
# Add first level of column spanners
tab_spanner(
label = "Percent enrolled April 1, 1960",
columns = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)")
) %>%
# Add second level of column spanners
tab_spanner(
label = "Type of state law",
columns = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)"),
level = 2
) %>%
# Set table style
opt_row_striping() %>%
tab_options(
table.font.size = px(14),
heading.background.color = "#e8e8e8",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.bottom.width = px(3)
) %>%
# Add footnotes
tab_footnote(
footnote = "Standard errors in parentheses, both estimates and standard errors are in percentage",
locations = cells_title(groups = "title")
) %>%
tab_footnote(
footnote = md("The result of this table will not be the same as in the paper since the author doesn't provide data for this session. I only used the available data to approximate the result of this table. Results in the last column were manually calculated e.g. $-0.5611=(-0.8301)-(-0.2690)$"),
locations = cells_title(groups = "title")
) %>%
# Ensure math symbols render correctly
fmt_markdown(columns = everything()) %>%
# Ensure all column headers use gt package's html() syntax
cols_label(.list = ak91_gt_cols_label(names(table_data)))
# Save the objects to RData file
save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_II.RData"))