# Load required packages---- library(here) library(tidyverse) library(haven) # For reading Stata data library(broom) # For tidying regression results library(knitr) # For generating tables library(kableExtra) library(gt) source(here("04-topics/rep-ak1991/Rcode/ak1991-gt-quarto.R")) # Read the data raw_data <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/raw_data.dta") appendix2_table <- read_dta("D:/github/course-em-eng/04-topics/rep-ak1991/stata/appendix2_table.dta") # Rename variables to match Stata code data <- raw_data %>% rename( AGE = v1, AGEQ = v2, EDUC = v4, ENOCENT = v5, ESOCENT = v6, LWKLYWGE = v9, MARRIED = v10, MIDATL = v11, MT = v12, NEWENG = v13, CENSUS = v16, SOB = v17, QOB = v18, RACE = v19, SMSA = v20, SOATL = v21, WNOCENT = v24, WSOCENT = v25, YOB = v27 ) # Adjust YOB for years >= 1900 data <- data %>% mutate(YOB = if_else(YOB >= 1900, YOB - 1900, YOB)) # Create YOB dummies for (i in 0:9) { data <- data %>% mutate(!!paste0("YR", i) := if_else(YOB %in% c(20 + i, 30 + i, 40 + i), 1, 0)) } # Create QOB dummies for (i in 1:4) { data <- data %>% mutate(!!paste0("QTR", i) := if_else(QOB == i, 1, 0)) } # Create QOB*YOB interaction dummies for (j in 1:3) { for (i in 0:9) { data <- data %>% mutate(!!paste0("QTR", j, "YR", i) := !!sym(paste0("QTR", j)) * !!sym(paste0("YR", i))) } } # Create COHORT variable data <- data %>% mutate( COHORT = case_when( YOB <= 29 & YOB >= 20 ~ 2029, YOB <= 39 & YOB >= 30 ~ 3039, YOB <= 49 & YOB >= 40 ~ 4049, TRUE ~ NA_real_ ), AGEQ = if_else(CENSUS == 80, AGEQ - 1900, AGEQ), AGEQSQ = AGEQ * AGEQ ) # Merge with appendix2_table data_all <- data %>% left_join(appendix2_table, by = "SOB") # Analysis for Table II yob_table2 <- 44 # First Column: 16 # Create dt2 variable for age 16 dt2_16 <- data_all %>% filter(YOB == yob_table2, at_age1960 == 16) %>% mutate(dt2 = 0) %>% mutate(dt2 = if_else(EDUC >= 12, 1, dt2)) # Regression for QTR1 model1 <- lm(dt2 ~ QTR1, data = dt2_16) model1_tidy <- tidy(model1) %>% # Convert to percentage mutate_at(c("estimate", "std.error"), \(x) 100*x) # summary(model1) # Regression for QTR2, QTR3, QTR4 # Create a subset for model2 to match Stata's if condition dt2_16_model2 <- dt2_16 %>% select(dt2, QTR2, QTR3, QTR4) model2 <- lm(dt2 ~ QTR2 + QTR3 + QTR4, data = dt2_16_model2) model2_tidy <- tidy(model2) %>% # Convert to percentage mutate_at(c("estimate", "std.error"), \(x) 100*x) # summary(model2) # Second Column: 17 or 18 # Create dt2 variable for age 17 or 18 dt2_17_18 <- data_all %>% filter(YOB == yob_table2, at_age1960 %in% c(17, 18)) %>% mutate(dt2 = 0) %>% mutate(dt2 = if_else(EDUC >= 12, 1, dt2)) # Regression for QTR1 model3 <- lm(dt2 ~ QTR1, data = dt2_17_18) model3_tidy <- tidy(model3) %>% # Convert to percentage mutate_at(c("estimate", "std.error"), \(x) 100*x) # summary(model3) # Regression for QTR2, QTR3, QTR4 # Create a subset for model4 to match Stata's if condition dt2_17_18_model4 <- dt2_17_18 %>% select(dt2, QTR2, QTR3, QTR4) model4 <- lm(dt2 ~ QTR2 + QTR3 + QTR4, data = dt2_17_18_model4) model4_tidy <- tidy(model4) %>% # Convert to percentage mutate_at(c("estimate", "std.error"), \(x) 100*x) # summary(model4) # Create table data table_data <- tibble( "Date of Birth" = c("1. Jan 1-Mar 31, 1994", "2. Apr 1-Dec 31, 1994", "3. Within-state diff."), #"Type of state law" = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)", ""), "School-leaving age: 16
(1)" = c( ak91_fmt_br_se(model2_tidy$estimate[1], model2_tidy$std.error[1]), ak91_fmt_br_se(model1_tidy$estimate[1], model1_tidy$std.error[1]), ak91_fmt_br_se( model2_tidy$estimate[1] - model1_tidy$estimate[1], sqrt(model2_tidy$std.error[1]^2 + model1_tidy$std.error[1]^2) ) ), "School-leaving age: 17 or 18
(2)" = c( ak91_fmt_br_se(model4_tidy$estimate[1], model4_tidy$std.error[1]), ak91_fmt_br_se(model3_tidy$estimate[1], model3_tidy$std.error[1]), ak91_fmt_br_se( model4_tidy$estimate[1] - model3_tidy$estimate[1], sqrt(model4_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2) ) ), "Column (1) - (2)" = c( ak91_fmt_br_se( model2_tidy$estimate[1] - model4_tidy$estimate[1], sqrt(model2_tidy$std.error[1]^2 + model4_tidy$std.error[1]^2) ), ak91_fmt_br_se( model1_tidy$estimate[1] - model3_tidy$estimate[1], sqrt(model1_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2) ), ak91_fmt_br_se( (model2_tidy$estimate[1] - model1_tidy$estimate[1]) - (model4_tidy$estimate[1] - model3_tidy$estimate[1]), sqrt( model2_tidy$std.error[1]^2 + model1_tidy$std.error[1]^2 + model4_tidy$std.error[1]^2 + model3_tidy$std.error[1]^2 ) ) ) ) # Create the table using kableExtra kbl_out <- table_data %>% kbl( format = "html", align = c('l', 'c', 'c', 'c'), caption = "Table II: Percent enrolled April 1, 1960", escape = FALSE ) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>% add_header_above(c(" " = 1, "Type of state law" = 2, " " =1)) %>% add_header_above(c(" " = 1, "Percent enrolled April 1, 1960" = 2, " " =1)) %>% footnote( general = c( "Standard errors in parentheses, both estimates and standard errors are in percentage", "The result of this table will not be the same as in the paper since the author doesn't provide data for this session. I only used the available data to approximate the result of this table. Results in the last column were manually calculated e.g. $-0.5611=(-0.8301)-(-0.2690)$") ) # Create the table using gt table_data <- ak91_quarto_blank_df(table_data) gt_tbl <- table_data %>% gt() %>% # Set title tab_header( title = "Table II: Percent enrolled April 1, 1960" ) %>% # Set column alignment cols_align( align = "left", columns = 1 ) %>% cols_align( align = "center", columns = 2:4 ) %>% # Add first level of column spanners tab_spanner( label = "Percent enrolled April 1, 1960", columns = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)") ) %>% # Add second level of column spanners tab_spanner( label = "Type of state law", columns = c("School-leaving age: 16
(1)", "School-leaving age: 17 or 18
(2)"), level = 2 ) %>% # Set table style opt_row_striping() %>% tab_options( table.font.size = px(14), heading.background.color = "#e8e8e8", column_labels.font.weight = "bold", table.border.top.width = px(3), table.border.bottom.width = px(3) ) %>% # Add footnotes tab_footnote( footnote = "Standard errors in parentheses, both estimates and standard errors are in percentage", locations = cells_title(groups = "title") ) %>% tab_footnote( footnote = md("The result of this table will not be the same as in the paper since the author doesn't provide data for this session. I only used the available data to approximate the result of this table. Results in the last column were manually calculated e.g. $-0.5611=(-0.8301)-(-0.2690)$"), locations = cells_title(groups = "title") ) %>% # Ensure math symbols render correctly fmt_markdown(columns = everything()) %>% # Ensure all column headers use gt package's html() syntax cols_label(.list = ak91_gt_cols_label(names(table_data))) # Save the objects to RData file save(table_data, gt_tbl, file = here("04-topics/rep-ak1991/Rcode/Table_II.RData"))