rm(list=ls()) library(car) library(sandwich) dataset = c("data_with_tuition.RData") output.path = c("") B = 1000 # number of bootstrap replications d = 5 # degree of polynomial (leave at 5) load(dataset) # Vectors of variables outcome = c("wage") choice = c("state") regressors = c("exp", "expsq", "cafqt", "cafqt2", "mhgc", "mhgc2", "numsibs", "numsibs2", "urban14", "lavlocwage17", "lavlocwage172", "avurate", "avurate2", "d57", "d58", "d59", "d60", "d61", "d62", "d63", "lwage5", "lurate") instruments = c("cafqt", "mhgc", "numsibs", "urban14", "lavlocwage17", "avurate", "cafqt2", "mhgc2", "numsibs2", "lavlocwage172", "avurate2", "d57", "d58", "d59", "d60", "d61", "d62", "d63", "pub4", "pub4_cafqt", "pub4_mhgc", "pub4_numsibs", "lwage5_17", "lwage5_17_cafqt", "lwage5_17_mhgc", "lwage5_17_numsibs", "lurate_17", "lurate_17_cafqt", "lurate_17_mhgc", "lurate_17_numsibs", "tuition", "tuition_cafqt", "tuition_mhgc", "tuition_numsibs") set.seed(703296661) attach(data) # Getting propensity scores probit <- glm(state ~ 1 + cafqt + mhgc + numsibs + urban14 + lavlocwage17 + avurate + cafqt2 + mhgc2 + numsibs2 + lavlocwage172 + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + pub4 + pub4_cafqt + pub4_mhgc + pub4_numsibs + lwage5_17 + lwage5_17_cafqt + lwage5_17_mhgc + lwage5_17_numsibs + lurate_17 + lurate_17_cafqt + lurate_17_mhgc + lurate_17_numsibs + tuition + tuition_cafqt + tuition_mhgc + tuition_numsibs, family=binomial(link="probit")) prop <- as.vector(fitted.values(probit)) # Saving the naive covariance matrix from first step (R_1^-1 in Murphy-Topel's notation) V1 = vcov(probit) # The matrix of first step variables Z = model.matrix(~ 1 + cafqt + mhgc + numsibs + urban14 + lavlocwage17 + avurate + cafqt2 + mhgc2 + numsibs2 + lavlocwage172 + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + pub4 + pub4_cafqt + pub4_mhgc + pub4_numsibs + lwage5_17 + lwage5_17_cafqt + lwage5_17_mhgc + lwage5_17_numsibs + lurate_17 + lurate_17_cafqt + lurate_17_mhgc + lurate_17_numsibs + tuition + tuition_cafqt + tuition_mhgc + tuition_numsibs) # Saving the score vector probit.scores = estfun(probit) # Propensity Score Histograms prop1 = prop[state==1] prop0 = prop[state==0] c.supt <- 1*(prop>=max(min(prop[state==0]),min(prop[state==1]))) & (prop<=min(max(prop[state==0]),max(prop[state==1]))) # Can't select common support and use the Newey-McFadden/Murphy-Topel procedure #detach(data) #data = subset(data, c.supt) #attach(data) #prop = subset(prop,c.supt) #prop.full = prop #probit.scores = subset(probit.scores,c.supt) N <- length(prop) # Polynomials in the propensity score propd <- matrix(0,N,d) for (l in 1:d) { propd[,l] <- prop^l } # Polynomial regressions poly.reg2 = lm(wage ~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,2]) poly.reg3 = lm(wage ~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:3)]) poly.reg4 = lm(wage ~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:4)]) poly.reg5 = lm(wage ~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:5)]) # Defining X matrices X.2 = model.matrix(~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,2]) X.3 = model.matrix(~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:3)]) X.4 = model.matrix(~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:4)]) X.5 = model.matrix(~ 1 + exp + expsq + cafqt + cafqt2 + mhgc + mhgc2 + numsibs + numsibs2 + urban14 + lavlocwage17 + lavlocwage172 + avurate + avurate2 + d57 + d58 + d59 + d60 + d61 + d62 + d63 + lwage5 + lurate + prop + exp:prop + expsq:prop + cafqt:prop + cafqt2:prop + mhgc:prop + mhgc2:prop + numsibs:prop + numsibs2:prop + urban14:prop + lavlocwage17:prop + lavlocwage172:prop + avurate:prop + avurate2:prop + d57:prop + d58:prop + d59:prop + d60:prop + d61:prop + d62:prop + d63:prop + lwage5:prop + lurate:prop + propd[,c(2:5)]) # Saving the naive covariance estimates (Q_0 in Murphy-Topel's notation) V2.poly.reg2 = vcov(poly.reg2) V2.poly.reg3 = vcov(poly.reg3) V2.poly.reg4 = vcov(poly.reg4) V2.poly.reg5 = vcov(poly.reg5) # Saving the score vectors poly.reg2.scores = estfun(poly.reg2) poly.reg3.scores = estfun(poly.reg3) poly.reg4.scores = estfun(poly.reg4) poly.reg5.scores = estfun(poly.reg5) # Constructing C matrix (Q_1 in Murphy-Topel's notation) WC.2 = diag(as.vector(poly.reg2$residuals^2 * dnorm(qnorm(prop)) * ((X.2[,c(1:23)] %*% as.matrix(c(poly.reg2$coefficients[24],poly.reg2$coefficients[26:47]))) + 2*prop*poly.reg2$coefficients[25]))) WC.3 = diag(as.vector(poly.reg3$residuals^2 * dnorm(qnorm(prop)) * ((X.3[,c(1:23)] %*% as.matrix(c(poly.reg3$coefficients[24],poly.reg3$coefficients[27:48]))) + 2*prop*poly.reg3$coefficients[25] + 3*prop^2*poly.reg3$coefficients[26]))) WC.4 = diag(as.vector(poly.reg4$residuals^2 * dnorm(qnorm(prop)) * ((X.4[,c(1:23)] %*% as.matrix(c(poly.reg4$coefficients[24],poly.reg4$coefficients[28:49]))) + 2*prop*poly.reg4$coefficients[25] + 3*prop^2*poly.reg4$coefficients[26] + 4*prop^3*poly.reg4$coefficients[27]))) WC.5 = diag(as.vector(poly.reg5$residuals^2 * dnorm(qnorm(prop)) * ((X.5[,c(1:23)] %*% as.matrix(c(poly.reg5$coefficients[24],poly.reg5$coefficients[29:50]))) + 2*prop*poly.reg5$coefficients[25] + 3*prop^2*poly.reg5$coefficients[26] + 4*prop^3*poly.reg5$coefficients[27] + 5*prop^4*poly.reg5$coefficients[28]))) C.2 = t(X.2) %*% WC.2 %*% Z C.3 = t(X.3) %*% WC.3 %*% Z C.4 = t(X.4) %*% WC.4 %*% Z C.5 = t(X.5) %*% WC.5 %*% Z # Constructing R matrix (Q_2 in Murphy-Topel's notation) R.2 = t(poly.reg2.scores) %*% probit.scores R.3 = t(poly.reg3.scores) %*% probit.scores R.4 = t(poly.reg4.scores) %*% probit.scores R.5 = t(poly.reg5.scores) %*% probit.scores # Murphy-Topel standard errors M.2 = V2.poly.reg2 + (V2.poly.reg2 %*% (C.2 %*% V1 %*% t(C.2) - R.2 %*% V1 %*% t(C.2) - C.2 %*% V1 %*% t(R.2)) %*% V2.poly.reg2) M.3 = V2.poly.reg3 + (V2.poly.reg3 %*% (C.3 %*% V1 %*% t(C.3) - R.3 %*% V1 %*% t(C.3) - C.3 %*% V1 %*% t(R.3)) %*% V2.poly.reg3) M.4 = V2.poly.reg4 + (V2.poly.reg4 %*% (C.4 %*% V1 %*% t(C.4) - R.4 %*% V1 %*% t(C.4) - C.4 %*% V1 %*% t(R.4)) %*% V2.poly.reg4) M.5 = V2.poly.reg5 + (V2.poly.reg5 %*% (C.5 %*% V1 %*% t(C.5) - R.5 %*% V1 %*% t(C.5) - C.5 %*% V1 %*% t(R.5)) %*% V2.poly.reg5) # Comparison of standard errors #compare.std.err.2 = cbind(sqrt(diag(vcov(poly.reg2))),sqrt(diag(hccm(poly.reg2))),sqrt(diag(M.2))) #compare.std.err.3 = cbind(sqrt(diag(vcov(poly.reg3))),sqrt(diag(hccm(poly.reg3))),sqrt(diag(M.3))) #compare.std.err.4 = cbind(sqrt(diag(vcov(poly.reg4))),sqrt(diag(hccm(poly.reg4))),sqrt(diag(M.4))) #compare.std.err.5 = cbind(sqrt(diag(vcov(poly.reg5))),sqrt(diag(hccm(poly.reg5))),sqrt(diag(M.5))) # Picking out the coefficients on polynomials in P OLS2 = poly.reg2$coefficients[(length(regressors)+3):(length(regressors)+2+1)] OLS3 = poly.reg3$coefficients[(length(regressors)+3):(length(regressors)+3+1)] OLS4 = poly.reg4$coefficients[(length(regressors)+3):(length(regressors)+4+1)] OLS5 = poly.reg5$coefficients[(length(regressors)+3):(length(regressors)+5+1)] # Calculating the robust covariance matrices of the estimates cov2 = hccm(poly.reg2)[(length(regressors)+3):(length(regressors)+2+1),(length(regressors)+3):(length(regressors)+2+1)] cov3 = hccm(poly.reg3)[(length(regressors)+3):(length(regressors)+3+1),(length(regressors)+3):(length(regressors)+3+1)] cov4 = hccm(poly.reg4)[(length(regressors)+3):(length(regressors)+4+1),(length(regressors)+3):(length(regressors)+4+1)] cov5 = hccm(poly.reg5)[(length(regressors)+3):(length(regressors)+5+1),(length(regressors)+3):(length(regressors)+5+1)] cov2.mt = M.2[(length(regressors)+3):(length(regressors)+2+1),(length(regressors)+3):(length(regressors)+2+1)] cov3.mt = M.3[(length(regressors)+3):(length(regressors)+3+1),(length(regressors)+3):(length(regressors)+3+1)] cov4.mt = M.4[(length(regressors)+3):(length(regressors)+4+1),(length(regressors)+3):(length(regressors)+4+1)] cov5.mt = M.5[(length(regressors)+3):(length(regressors)+5+1),(length(regressors)+3):(length(regressors)+5+1)] # Test statistics test.statistics = vector(mode='numeric',length=4) test.statistics[1] = t(OLS2) %*% solve(cov2) %*% OLS2 test.statistics[2] = t(OLS3) %*% solve(cov3) %*% OLS3 test.statistics[3] = t(OLS4) %*% solve(cov4) %*% OLS4 test.statistics[4] = t(OLS5) %*% solve(cov5) %*% OLS5 test.statistics.mt = vector(mode='numeric',length=4) test.statistics.mt[1] = t(OLS2) %*% solve(cov2.mt) %*% OLS2 test.statistics.mt[2] = t(OLS3) %*% solve(cov3.mt) %*% OLS3 test.statistics.mt[3] = t(OLS4) %*% solve(cov4.mt) %*% OLS4 test.statistics.mt[4] = t(OLS5) %*% solve(cov5.mt) %*% OLS5 # P values p.values = vector(mode='numeric', length=4) p.values[1] = pchisq(test.statistics[1],df=1,lower.tail=FALSE) p.values[2] = pchisq(test.statistics[2],df=2,lower.tail=FALSE) p.values[3] = pchisq(test.statistics[3],df=3,lower.tail=FALSE) p.values[4] = pchisq(test.statistics[4],df=4,lower.tail=FALSE) p.values.mt = vector(mode='numeric', length=4) p.values.mt[1] = pchisq(test.statistics.mt[1],df=1,lower.tail=FALSE) p.values.mt[2] = pchisq(test.statistics.mt[2],df=2,lower.tail=FALSE) p.values.mt[3] = pchisq(test.statistics.mt[3],df=3,lower.tail=FALSE) p.values.mt[4] = pchisq(test.statistics.mt[4],df=4,lower.tail=FALSE) # Bootstrapping ############### p.values.star = matrix(0, 4, B) p.values.mt.star = matrix(0, 4, B) propd.star = matrix(0, N, d) test.statistics.star = matrix(0, 4, B) test.statistics.mt.star = matrix(0, 4, B) OLS2.star = matrix(0, 1, B) OLS3.star = matrix(0, 2, B) OLS4.star = matrix(0, 3, B) OLS5.star = matrix(0, 4, B) for (i in 1:B) { sample <- sample(c(1:N), N, replace=TRUE) # Selecting the bootstrap sample wage.star = wage[sample] state.star = state[sample] cafqt.star = cafqt[sample] cafqt2.star = cafqt2[sample] mhgc.star = mhgc[sample] mhgc2.star = mhgc2[sample] numsibs.star = numsibs[sample] numsibs2.star = numsibs2[sample] urban14.star = urban14[sample] avurate.star = avurate[sample] avurate2.star = avurate2[sample] lavlocwage17.star = lavlocwage17[sample] lavlocwage172.star = lavlocwage172[sample] d57.star = d57[sample] d58.star = d58[sample] d59.star = d59[sample] d60.star = d60[sample] d61.star = d61[sample] d62.star = d62[sample] d63.star = d63[sample] pub4.star = pub4[sample] pub4_cafqt.star = pub4_cafqt[sample] pub4_mhgc.star = pub4_mhgc[sample] pub4_numsibs.star = pub4_numsibs[sample] lwage5_17.star = lwage5_17[sample] lwage5_17_cafqt.star = lwage5_17_cafqt[sample] lwage5_17_mhgc.star = lwage5_17_mhgc[sample] lwage5_17_numsibs.star = lwage5_17_numsibs[sample] lurate_17.star = lurate_17[sample] lurate_17_cafqt.star = lurate_17_cafqt[sample] lurate_17_mhgc.star = lurate_17_mhgc[sample] lurate_17_numsibs.star = lurate_17_numsibs[sample] tuition.star = tuition[sample] tuition_cafqt.star = tuition_cafqt[sample] tuition_mhgc.star = tuition_mhgc[sample] tuition_numsibs.star = tuition_numsibs[sample] exp.star = exp[sample] expsq.star = expsq[sample] lwage5.star = lwage5[sample] lurate.star = lurate[sample] # Re-estimating the propensity score in the bootstrap sample probit.star <- glm(state.star ~ 1 + cafqt.star + mhgc.star + numsibs.star + urban14.star + lavlocwage17.star + avurate.star + cafqt2.star + mhgc2.star + numsibs2.star + lavlocwage172.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + pub4.star + pub4_cafqt.star + pub4_mhgc.star + pub4_numsibs.star + lwage5_17.star + lwage5_17_cafqt.star + lwage5_17_mhgc.star + lwage5_17_numsibs.star + lurate_17.star + lurate_17_cafqt.star + lurate_17_mhgc.star + lurate_17_numsibs.star + tuition.star + tuition_cafqt.star + tuition_mhgc.star + tuition_numsibs.star, family=binomial(link="probit")) prop.star <- as.vector(fitted.values(probit.star)) # Saving the naive covariance matrix from first step (R_1^-1 in Murphy-Topel's notation) V1.star = vcov(probit.star) # The matrix of first step variables Z.star = model.matrix(~ 1 + cafqt.star + mhgc.star + numsibs.star + urban14.star + lavlocwage17.star + avurate.star + cafqt2.star + mhgc2.star + numsibs2.star + lavlocwage172.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + pub4.star + pub4_cafqt.star + pub4_mhgc.star + pub4_numsibs.star + lwage5_17.star + lwage5_17_cafqt.star + lwage5_17_mhgc.star + lwage5_17_numsibs.star + lurate_17.star + lurate_17_cafqt.star + lurate_17_mhgc.star + lurate_17_numsibs.star + tuition.star + tuition_cafqt.star + tuition_mhgc.star + tuition_numsibs.star) # Saving the score vector probit.scores.star = estfun(probit.star) # Polynomials in the propensity score propd.star <- matrix(0,N,d) for (l in 1:d) { propd.star[,l] <- prop.star^l } # Polynomial regressions poly.reg2.star = lm(wage.star ~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,2]) poly.reg3.star = lm(wage.star ~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:3)]) poly.reg4.star = lm(wage.star ~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:4)]) poly.reg5.star = lm(wage.star ~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:5)]) # Defining X matrices X.2.star = model.matrix(~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,2]) X.3.star = model.matrix(~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:3)]) X.4.star = model.matrix(~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:4)]) X.5.star = model.matrix(~ 1 + exp.star + expsq.star + cafqt.star + cafqt2.star + mhgc.star + mhgc2.star + numsibs.star + numsibs2.star + urban14.star + lavlocwage17.star + lavlocwage172.star + avurate.star + avurate2.star + d57.star + d58.star + d59.star + d60.star + d61.star + d62.star + d63.star + lwage5.star + lurate.star + prop.star + exp.star:prop.star + expsq.star:prop.star + cafqt.star:prop.star + cafqt2.star:prop.star + mhgc.star:prop.star + mhgc2.star:prop.star + numsibs.star:prop.star + numsibs2.star:prop.star + urban14.star:prop.star + lavlocwage17.star:prop.star + lavlocwage172.star:prop.star + avurate.star:prop.star + avurate2.star:prop.star + d57.star:prop.star + d58.star:prop.star + d59.star:prop.star + d60.star:prop.star + d61.star:prop.star + d62.star:prop.star + d63.star:prop.star + lwage5.star:prop.star + lurate.star:prop.star + propd.star[,c(2:5)]) # Saving the naive covariance estimates (Q_0 in Murphy-Topel's notation) V2.poly.reg2.star = vcov(poly.reg2.star) V2.poly.reg3.star = vcov(poly.reg3.star) V2.poly.reg4.star = vcov(poly.reg4.star) V2.poly.reg5.star = vcov(poly.reg5.star) # Saving the score vectors poly.reg2.scores.star = estfun(poly.reg2.star) poly.reg3.scores.star = estfun(poly.reg3.star) poly.reg4.scores.star = estfun(poly.reg4.star) poly.reg5.scores.star = estfun(poly.reg5.star) # Constructing C matrix (Q_1 in Murphy-Topel's notation) WC.2.star = diag(as.vector(poly.reg2.star$residuals^2 * dnorm(qnorm(prop.star)) * ((X.2.star[,c(1:23)] %*% as.matrix(c(poly.reg2.star$coefficients[24],poly.reg2.star$coefficients[26:47]))) + 2*prop.star*poly.reg2.star$coefficients[25]))) WC.3.star = diag(as.vector(poly.reg3.star$residuals^2 * dnorm(qnorm(prop.star)) * ((X.3.star[,c(1:23)] %*% as.matrix(c(poly.reg3.star$coefficients[24],poly.reg3.star$coefficients[27:48]))) + 2*prop.star*poly.reg3.star$coefficients[25] + 3*prop.star^2*poly.reg3.star$coefficients[26]))) WC.4.star = diag(as.vector(poly.reg4.star$residuals^2 * dnorm(qnorm(prop.star)) * ((X.4.star[,c(1:23)] %*% as.matrix(c(poly.reg4.star$coefficients[24],poly.reg4.star$coefficients[28:49]))) + 2*prop.star*poly.reg4.star$coefficients[25] + 3*prop.star^2*poly.reg4.star$coefficients[26] + 4*prop.star^3*poly.reg4.star$coefficients[27]))) WC.5.star = diag(as.vector(poly.reg5.star$residuals^2 * dnorm(qnorm(prop.star)) * ((X.5.star[,c(1:23)] %*% as.matrix(c(poly.reg5.star$coefficients[24],poly.reg5.star$coefficients[29:50]))) + 2*prop.star*poly.reg5.star$coefficients[25] + 3*prop.star^2*poly.reg5.star$coefficients[26] + 4*prop.star^3*poly.reg5.star$coefficients[27] + 5*prop.star^4*poly.reg5.star$coefficients[28]))) C.2.star = t(X.2.star) %*% WC.2.star %*% Z.star C.3.star = t(X.3.star) %*% WC.3.star %*% Z.star C.4.star = t(X.4.star) %*% WC.4.star %*% Z.star C.5.star = t(X.5.star) %*% WC.5.star %*% Z.star # Constructing R matrix (Q_2 in Murphy-Topel's notation) R.2.star = t(poly.reg2.scores.star) %*% probit.scores.star R.3.star = t(poly.reg3.scores.star) %*% probit.scores.star R.4.star = t(poly.reg4.scores.star) %*% probit.scores.star R.5.star = t(poly.reg5.scores.star) %*% probit.scores.star # Murphy-Topel standard errors M.2.star = V2.poly.reg2.star + (V2.poly.reg2.star %*% (C.2.star %*% V1.star %*% t(C.2.star) - R.2.star %*% V1.star %*% t(C.2.star) - C.2.star %*% V1.star %*% t(R.2.star)) %*% V2.poly.reg2.star) M.3.star = V2.poly.reg3.star + (V2.poly.reg3.star %*% (C.3.star %*% V1.star %*% t(C.3.star) - R.3.star %*% V1.star %*% t(C.3.star) - C.3.star %*% V1.star %*% t(R.3.star)) %*% V2.poly.reg3.star) M.4.star = V2.poly.reg4.star + (V2.poly.reg4.star %*% (C.4.star %*% V1.star %*% t(C.4.star) - R.4.star %*% V1.star %*% t(C.4.star) - C.4.star %*% V1.star %*% t(R.4.star)) %*% V2.poly.reg4.star) M.5.star = V2.poly.reg5.star + (V2.poly.reg5.star %*% (C.5.star %*% V1.star %*% t(C.5.star) - R.5.star %*% V1.star %*% t(C.5.star) - C.5.star %*% V1.star %*% t(R.5.star)) %*% V2.poly.reg5.star) # Picking the relevant coefficients OLS2.star[,i] = poly.reg2.star$coefficients[(length(regressors)+3):(length(regressors)+2+1)] OLS3.star[,i] = poly.reg3.star$coefficients[(length(regressors)+3):(length(regressors)+3+1)] OLS4.star[,i] = poly.reg4.star$coefficients[(length(regressors)+3):(length(regressors)+4+1)] OLS5.star[,i] = poly.reg5.star$coefficients[(length(regressors)+3):(length(regressors)+5+1)] # Calculating covariance matrices cov2.star = hccm(poly.reg2.star)[(length(regressors)+3):(length(regressors)+2+1),(length(regressors)+3):(length(regressors)+2+1)] cov3.star = hccm(poly.reg3.star)[(length(regressors)+3):(length(regressors)+3+1),(length(regressors)+3):(length(regressors)+3+1)] cov4.star = hccm(poly.reg4.star)[(length(regressors)+3):(length(regressors)+4+1),(length(regressors)+3):(length(regressors)+4+1)] cov5.star = hccm(poly.reg5.star)[(length(regressors)+3):(length(regressors)+5+1),(length(regressors)+3):(length(regressors)+5+1)] cov2.mt.star = M.2.star[(length(regressors)+3):(length(regressors)+2+1),(length(regressors)+3):(length(regressors)+2+1)] cov3.mt.star = M.3.star[(length(regressors)+3):(length(regressors)+3+1),(length(regressors)+3):(length(regressors)+3+1)] cov4.mt.star = M.4.star[(length(regressors)+3):(length(regressors)+4+1),(length(regressors)+3):(length(regressors)+4+1)] cov5.mt.star = M.5.star[(length(regressors)+3):(length(regressors)+5+1),(length(regressors)+3):(length(regressors)+5+1)] # Test statistics test.statistics.star[1,i] = t(OLS2.star[,i] - OLS2) %*% solve(cov2.star) %*% (OLS2.star[,i] - OLS2) test.statistics.star[2,i] = t(OLS3.star[,i] - OLS3) %*% solve(cov3.star) %*% (OLS3.star[,i] - OLS3) test.statistics.star[3,i] = t(OLS4.star[,i] - OLS4) %*% solve(cov4.star) %*% (OLS4.star[,i] - OLS4) test.statistics.star[4,i] = t(OLS5.star[,i] - OLS5) %*% solve(cov5.star) %*% (OLS5.star[,i] - OLS5) test.statistics.mt.star[1,i] = t(OLS2.star[,i] - OLS2) %*% solve(cov2.mt.star) %*% (OLS2.star[,i] - OLS2) test.statistics.mt.star[2,i] = t(OLS3.star[,i] - OLS3) %*% solve(cov3.mt.star) %*% (OLS3.star[,i] - OLS3) test.statistics.mt.star[3,i] = t(OLS4.star[,i] - OLS4) %*% solve(cov4.mt.star) %*% (OLS4.star[,i] - OLS4) test.statistics.mt.star[4,i] = t(OLS5.star[,i] - OLS5) %*% solve(cov5.mt.star) %*% (OLS5.star[,i] - OLS5) print(i) } # Calculating the distribution of p-values under the null (bootstrap analog) for each model for (i in 1:B) { p.values.star[1,i] = sum(1*(test.statistics.star[1,] > test.statistics.star[1,i]))/B p.values.star[2,i] = sum(1*(test.statistics.star[2,] > test.statistics.star[2,i]))/B p.values.star[3,i] = sum(1*(test.statistics.star[3,] > test.statistics.star[3,i]))/B p.values.star[4,i] = sum(1*(test.statistics.star[4,] > test.statistics.star[4,i]))/B } for (i in 1:B) { p.values.mt.star[1,i] = sum(1*(test.statistics.mt.star[1,] > test.statistics.mt.star[1,i]))/B p.values.mt.star[2,i] = sum(1*(test.statistics.mt.star[2,] > test.statistics.mt.star[2,i]))/B p.values.mt.star[3,i] = sum(1*(test.statistics.mt.star[3,] > test.statistics.mt.star[3,i]))/B p.values.mt.star[4,i] = sum(1*(test.statistics.mt.star[4,] > test.statistics.mt.star[4,i]))/B } # Calculating the critical value for the test as the 0.05 quantile of the minimum of the p-values of the different models critical.value = quantile(apply(p.values.star,2,min), 0.05) critical.value.10 = quantile(apply(p.values.star,2,min), 0.10) critical.value.mt = quantile(apply(p.values.mt.star,2,min), 0.05) critical.value.mt.10 = quantile(apply(p.values.mt.star,2,min), 0.10) # Calculating p-values by comparing the distribution of test statistics in the bootstrap sample to the test statistics in the whole sample p.values.boot = vector(mode='numeric', length=4) p.values.boot[1] = sum(1*(test.statistics.star[1,] > test.statistics[1]))/B p.values.boot[2] = sum(1*(test.statistics.star[2,] > test.statistics[2]))/B p.values.boot[3] = sum(1*(test.statistics.star[3,] > test.statistics[3]))/B p.values.boot[4] = sum(1*(test.statistics.star[4,] > test.statistics[4]))/B # P-values using Murphy-Topel standard errors p.values.mt.boot = vector(mode='numeric', length=4) p.values.mt.boot[1] = sum(1*(test.statistics.mt.star[1,] > test.statistics.mt[1]))/B p.values.mt.boot[2] = sum(1*(test.statistics.mt.star[2,] > test.statistics.mt[2]))/B p.values.mt.boot[3] = sum(1*(test.statistics.mt.star[3,] > test.statistics.mt[3]))/B p.values.mt.boot[4] = sum(1*(test.statistics.mt.star[4,] > test.statistics.mt[4]))/B # Bootstrapping non-pivotal statistic p.values.nonpivot = vector(mode='numeric',length=4) p.values.nonpivot[1] = pchisq((OLS2^2)/var(t(OLS2.star)),df=1,lower.tail=F) p.values.nonpivot[2] = pchisq(OLS3 %*% solve(var(t(OLS3.star))) %*% as.matrix(OLS3),df=2,lower.tail=F) p.values.nonpivot[3] = pchisq(OLS4 %*% solve(var(t(OLS4.star))) %*% as.matrix(OLS4),df=3,lower.tail=F) p.values.nonpivot[4] = pchisq(OLS5 %*% solve(var(t(OLS5.star))) %*% as.matrix(OLS5),df=4,lower.tail=F) # Bias-correcting the coefficient vectors OLS2.bc = 2*OLS2 - apply(OLS2.star,1,mean) OLS3.bc = 2*OLS3 - apply(OLS3.star,1,mean) OLS4.bc = 2*OLS4 - apply(OLS4.star,1,mean) OLS5.bc = 2*OLS5 - apply(OLS5.star,1,mean) p.values.nonpivot.bc = vector(mode='numeric',length=4) p.values.nonpivot.bc[1] = pchisq((OLS2.bc^2)/var(t(OLS2.star)),df=1,lower.tail=F) p.values.nonpivot.bc[2] = pchisq(OLS3.bc %*% solve(var(t(OLS3.star))) %*% as.matrix(OLS3.bc),df=2,lower.tail=F) p.values.nonpivot.bc[3] = pchisq(OLS4.bc %*% solve(var(t(OLS4.star))) %*% as.matrix(OLS4.bc),df=3,lower.tail=F) p.values.nonpivot.bc[4] = pchisq(OLS5.bc %*% solve(var(t(OLS5.star))) %*% as.matrix(OLS5.bc),df=4,lower.tail=F) # Bias-correcting the test statistics test.statistics.bc = vector(mode='numeric', length=4) test.statistics.bc[1] = 2*test.statistics[1] - mean(test.statistics.star[1,]) test.statistics.bc[2] = 2*test.statistics[2] - mean(test.statistics.star[2,]) test.statistics.bc[3] = 2*test.statistics[3] - mean(test.statistics.star[3,]) test.statistics.bc[4] = 2*test.statistics[4] - mean(test.statistics.star[4,]) test.statistics.mt.bc = vector(mode='numeric', length=4) test.statistics.mt.bc[1] = 2*test.statistics.mt[1] - mean(test.statistics.mt.star[1,]) test.statistics.mt.bc[2] = 2*test.statistics.mt[2] - mean(test.statistics.mt.star[2,]) test.statistics.mt.bc[3] = 2*test.statistics.mt[3] - mean(test.statistics.mt.star[3,]) test.statistics.mt.bc[4] = 2*test.statistics.mt[4] - mean(test.statistics.mt.star[4,]) # Calculating the p-values from the bias-corrected test statistics using the bootstrap distribution of test statistics p.values.bc.boot = vector(mode='numeric', length=4) p.values.bc.boot[1] = sum(1*(test.statistics.star[1,] > test.statistics.bc[1]))/B p.values.bc.boot[2] = sum(1*(test.statistics.star[2,] > test.statistics.bc[2]))/B p.values.bc.boot[3] = sum(1*(test.statistics.star[3,] > test.statistics.bc[3]))/B p.values.bc.boot[4] = sum(1*(test.statistics.star[4,] > test.statistics.bc[4]))/B # Bias-corrected p-values using Murphy-Topel standard errors p.values.mt.bc.boot = vector(mode='numeric', length=4) p.values.mt.bc.boot[1] = sum(1*(test.statistics.mt.star[1,] > test.statistics.mt.bc[1]))/B p.values.mt.bc.boot[2] = sum(1*(test.statistics.mt.star[2,] > test.statistics.mt.bc[2]))/B p.values.mt.bc.boot[3] = sum(1*(test.statistics.mt.star[3,] > test.statistics.mt.bc[3]))/B p.values.mt.bc.boot[4] = sum(1*(test.statistics.mt.star[4,] > test.statistics.mt.bc[4]))/B # Calculating the p-values from the bias-corrected test statistics using the chi-square asymptotic approximation p.values.bc = vector(mode='numeric', length=4) p.values.bc[1] = pchisq(test.statistics.bc[1],df=1,lower.tail=FALSE) p.values.bc[2] = pchisq(test.statistics.bc[2],df=2,lower.tail=FALSE) p.values.bc[3] = pchisq(test.statistics.bc[3],df=3,lower.tail=FALSE) p.values.bc[4] = pchisq(test.statistics.bc[4],df=4,lower.tail=FALSE) # Outputting results write.table(p.values.boot, file=paste(output.path,"p_values.txt",sep="")) write.table(p.values.bc.boot, file=paste(output.path,"p_values_bc.txt",sep="")) write.table(p.values.mt.boot, file=paste(output.path,"p_values_MT.txt",sep="")) write.table(p.values.mt.bc.boot, file=paste(output.path,"p_values_bc_MT.txt",sep="")) write.table(p.values.nonpivot, file=paste(output.path,"p_values_nonpivot.txt",sep="")) write.table(p.values.nonpivot.bc, file=paste(output.path,"p_values_nonpivot_bc.txt",sep="")) write.table(critical.value, file=paste(output.path,"critical_value.txt",sep="")) write.table(critical.value.10, file=paste(output.path,"critical_value_10.txt",sep="")) write.table(critical.value.mt, file=paste(output.path,"critical_value_MT.txt",sep="")) write.table(critical.value.mt.10, file=paste(output.path,"critical_value_MT_10.txt",sep="")) detach(data)