Code
require(knitr)
require(here)
require(broom)
require(haven)
require(systemfit)
require(AER)
#renv::install("ivmodel")
require(ivmodel) # for LIML fit
# load package
library(systemfit)
内生自变量及工具变量法的R代码实现
这里将展示Homework 03
的R
编程细节
为了研究居民的工资收入(wage
,或对数化的工资lwage
)是如何决定的,我们考虑如下所示的一些变量(具体定义见 表 1))。
变量_代码 | 变量_中文 | 定义和取值 |
---|---|---|
obs | 序号 | 序号 |
lwage | 对数化工资 | 定量变量:取对数后的工资 |
edu | 受教育年数 | 定量变量:受教育年数(年) |
exp | 工作年数 | 定量变量:工作年数(年) |
exp2 | 工作年数平方 | 定量变量:工作年数平方/100 |
black | 是否黑人 | 虚拟变量:1=黑人;0=其他 |
south | 是否南方地区 | 虚拟变量:1=南方地区;0=其他 |
urban | 是否居住市区 | 虚拟变量:1=居住市区;0=其他 |
college | 附近是否有大学 | 虚拟变量:1=附近有大学;0=其他 |
public | 附近是否有公立大学 | 虚拟变量:1=附近有公立大学;0=其他 |
private | 附近是否有私立大学 | 虚拟变量:1=附近有私立大学;0=其他 |
age | 年龄 | 定量变量:年龄(年) |
age2 | 年龄平方 | 定量变量:年龄(年)的平方/100 |
momedu | 母亲受教育年数 | 定量变量:母亲受教育年数(年) |
dadedu | 父亲受教育年数 | 定量变量:父亲受教育年数(年) |
\[\begin{align} \begin{split} lwage_i=&+\beta_{1}+\beta_{2}edu_i+\beta_{3}exp_i+\beta_{4}exp2_i\\&+\beta_{5}black_i+\beta_{6}south_i+\beta_{7}urban_i+u_i \end{split} \end{align}\]
Call:
lm(formula = mod_ls, data = tbl_reg)
Residuals:
Min 1Q Median 3Q Max
-1.59297 -0.22315 0.01893 0.24223 1.33190
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.733664 0.067603 70.022 < 2e-16 ***
edu 0.074009 0.003505 21.113 < 2e-16 ***
exp 0.083596 0.006648 12.575 < 2e-16 ***
exp2 -0.224088 0.031784 -7.050 2.21e-12 ***
black -0.189632 0.017627 -10.758 < 2e-16 ***
south -0.124862 0.015118 -8.259 < 2e-16 ***
urban 0.161423 0.015573 10.365 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.3742 on 3003 degrees of freedom
Multiple R-squared: 0.2905, Adjusted R-squared: 0.2891
F-statistic: 204.9 on 6 and 3003 DF, p-value: < 2.2e-16
edu
内生,工具变量为college
:
Call:
AER::ivreg(formula = mod_iv_cl, data = tbl_reg)
Residuals:
Min 1Q Median 3Q Max
-1.82125 -0.24065 0.02368 0.25469 1.43205
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.75278 0.82934 4.525 6.27e-06 ***
edu 0.13229 0.04923 2.687 0.00725 **
exp 0.10750 0.02130 5.047 4.76e-07 ***
exp2 -0.22841 0.03341 -6.836 9.84e-12 ***
black -0.13080 0.05287 -2.474 0.01342 *
south -0.10490 0.02307 -4.546 5.67e-06 ***
urban 0.13132 0.03013 4.359 1.35e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.391 on 3003 degrees of freedom
Multiple R-Squared: 0.2252, Adjusted R-squared: 0.2237
Wald test: 120.8 on 6 and 3003 DF, p-value: < 2.2e-16
edu
、exp
和exp2
内生,工具变量为college
、age
和age2
:
Call:
AER::ivreg(formula = mod_iv_ca, data = tbl_reg)
Residuals:
Min 1Q Median 3Q Max
-1.82400 -0.25248 0.02286 0.26349 1.31561
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.06567 0.60850 6.682 2.81e-11 ***
edu 0.13295 0.05138 2.588 0.009712 **
exp 0.05596 0.02599 2.153 0.031412 *
exp2 -0.07957 0.13403 -0.594 0.552796
black -0.10314 0.07737 -1.333 0.182623
south -0.09818 0.02876 -3.413 0.000651 ***
urban 0.10798 0.04974 2.171 0.030010 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4032 on 3003 degrees of freedom
Multiple R-Squared: 0.1764, Adjusted R-squared: 0.1747
Wald test: 148.1 on 6 and 3003 DF, p-value: < 2.2e-16
edu
内生,工具变量为public
和private
:
Call:
AER::ivreg(formula = mod_tsls_pp, data = tbl_reg)
Residuals:
Min 1Q Median 3Q Max
-1.93985 -0.25152 0.01722 0.27365 1.48154
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.26801 0.68718 4.756 2.07e-06 ***
edu 0.16109 0.04077 3.951 7.96e-05 ***
exp 0.11931 0.01818 6.564 6.16e-11 ***
exp2 -0.23054 0.03503 -6.582 5.46e-11 ***
black -0.10173 0.04531 -2.245 0.0248 *
south -0.09504 0.02165 -4.389 1.18e-05 ***
urban 0.11645 0.02705 4.305 1.73e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4108 on 3003 degrees of freedom
Multiple R-Squared: 0.1447, Adjusted R-squared: 0.143
Wald test: 111 on 6 and 3003 DF, p-value: < 2.2e-16
edu
、exp
和exp2
内生,工具变量为public
和private
、以及age
和age2
:
Call:
AER::ivreg(formula = mod_tsls_ppa, data = tbl_reg)
Residuals:
Min 1Q Median 3Q Max
-1.92063 -0.27218 0.02074 0.28186 1.43044
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.74815 0.48338 7.754 1.21e-14 ***
edu 0.15969 0.04090 3.904 9.65e-05 ***
exp 0.04703 0.02502 1.880 0.060213 .
exp2 -0.03225 0.12811 -0.252 0.801255
black -0.06403 0.06301 -1.016 0.309610
south -0.08573 0.02563 -3.345 0.000834 ***
urban 0.08348 0.04125 2.024 0.043073 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.4327 on 3003 degrees of freedom
Multiple R-Squared: 0.05111, Adjusted R-squared: 0.04921
Wald test: 129.7 on 6 and 3003 DF, p-value: < 2.2e-16
以上模型的综合汇总结果比较见下表:
说明:
如果使用两阶段最小二乘法,则只列出了第二阶段回归结果
最后一列LIML_pp
采用的是有限信息极大似然估计方法(LIML)。
systemfit
进行估计在 节 4 中,给定情形为:edu
、exp
和exp2
内生,工具变量为college
、age
和age2
。作为对比,我们此处使用R包systemfit
进行估计。具体结果见 表 2:
方程1.3需要去掉截距项,否则会报错!
hansen2021此时应用的是IV(b)方法。
# load package
library(systemfit)
# set two models
eq_1.1 <- edu ~ age + age2 +black +south +urban + college
eq_1.2 <- exp ~ age + age2 +black +south +urban + college
eq_1.3 <- exp2 ~ -1 + age + age2 +black +south +urban + college # important !!
eq_2 <- lwage ~ edu + exp + exp2 +black +south +urban
sys <- list(
eq1.1 = eq_1.1,
eq1.2 = eq_1.2,
eq1.3 = eq_1.3,
eq2 = eq_2)
# specify the instruments
instr <- ~age + age2 +black +south +urban + college
# fit models
fit.sys <- systemfit(
formula = sys,
inst = instr,
method ="2SLS",
data = tbl_reg)
smry <- summary(fit.sys)
out <- tidy(fit.sys) %>%
separate(., col = term,
into = c("eq", "vars"), sep = "_") %>%
select(-all_of(starts_with("conf")))
knitr::kable(out,
#caption ="等量工具变量下的2SLS结果" ,
digits = 4)
eq | vars | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
eq1.1 | (Intercept) | -1.8695 | 4.2984 | -0.4349 | 0.6636 |
eq1.1 | age | 1.0614 | 0.3014 | 3.5217 | 0.0004 |
eq1.1 | age2 | -1.8760 | 0.5231 | -3.5860 | 0.0003 |
eq1.1 | black | -1.4684 | 0.1154 | -12.7194 | 0.0000 |
eq1.1 | south | -0.4597 | 0.1024 | -4.4878 | 0.0000 |
eq1.1 | urban | 0.8354 | 0.1093 | 7.6465 | 0.0000 |
eq1.1 | college | 0.3471 | 0.1070 | 3.2441 | 0.0012 |
eq1.2 | (Intercept) | -4.1305 | 4.2984 | -0.9609 | 0.3367 |
eq1.2 | age | -0.0614 | 0.3014 | -0.2039 | 0.8385 |
eq1.2 | age2 | 1.8760 | 0.5231 | 3.5860 | 0.0003 |
eq1.2 | black | 1.4684 | 0.1154 | 12.7194 | 0.0000 |
eq1.2 | south | 0.4597 | 0.1024 | 4.4878 | 0.0000 |
eq1.2 | urban | -0.8354 | 0.1093 | -7.6465 | 0.0000 |
eq1.2 | college | -0.3471 | 0.1070 | -3.2441 | 0.0012 |
eq1.3 | age | -0.1275 | 0.0035 | -36.7038 | 0.0000 |
eq1.3 | age2 | 0.5746 | 0.0110 | 52.1129 | 0.0000 |
eq1.3 | black | 0.2835 | 0.0244 | 11.6143 | 0.0000 |
eq1.3 | south | 0.1116 | 0.0217 | 5.1534 | 0.0000 |
eq1.3 | urban | -0.1758 | 0.0231 | -7.6098 | 0.0000 |
eq1.3 | college | -0.0677 | 0.0226 | -2.9942 | 0.0028 |
eq2 | (Intercept) | 4.0657 | 0.6085 | 6.6815 | 0.0000 |
eq2 | edu | 0.1329 | 0.0514 | 2.5876 | 0.0097 |
eq2 | exp | 0.0560 | 0.0260 | 2.1528 | 0.0314 |
eq2 | exp2 | -0.0796 | 0.1340 | -0.5936 | 0.5528 |
eq2 | black | -0.1031 | 0.0774 | -1.3330 | 0.1826 |
eq2 | south | -0.0982 | 0.0288 | -3.4131 | 0.0007 |
eq2 | urban | 0.1080 | 0.0497 | 2.1710 | 0.0300 |
在 节 6 中,给定情形为:edu
、exp
和exp2
内生,工具变量为public
和private
、以及age
和age2
。作为对比,我们此处使用R包systemfit
进行估计。具体结果见表 表 3:
# load package
library(systemfit)
# set two models
eq_1.1 <- edu ~ age + age2 +black +south +urban + public +private
eq_1.2 <- exp ~ age + age2 +black +south +urban + public +private
eq_1.3 <- exp2 ~ age + age2 +black +south +urban + public +private
eq_2 <- lwage ~ edu + exp + exp2 +black +south +urban
sys <- list(
eq1.1 = eq_1.1,
eq1.2 = eq_1.2,
eq1.3 = eq_1.3,
eq2 = eq_2)
# specify the instruments
instr <- ~age + age2 +black +south +urban + public +private
# fit models
fit.sys <- systemfit(
formula = sys,
inst = instr,
method ="2SLS",
data = tbl_reg)
smry <- summary(fit.sys)
out <- tidy(fit.sys) %>%
separate(., col = term,
into = c("eq", "vars"), sep = "_") %>%
select(-all_of(starts_with("conf")))
knitr::kable(out,
#caption ="富余工具变量下的2SLS结果" ,
digits = 4)
eq | vars | estimate | std.error | statistic | p.value |
---|---|---|---|---|---|
eq1.1 | (Intercept) | -1.8692 | 4.2916 | -0.4356 | 0.6632 |
eq1.1 | age | 1.0612 | 0.3009 | 3.5265 | 0.0004 |
eq1.1 | age2 | -1.8772 | 0.5223 | -3.5939 | 0.0003 |
eq1.1 | black | -1.4681 | 0.1153 | -12.7375 | 0.0000 |
eq1.1 | south | -0.4270 | 0.1028 | -4.1550 | 0.0000 |
eq1.1 | urban | 0.8285 | 0.1091 | 7.5937 | 0.0000 |
eq1.1 | public | 0.4694 | 0.1133 | 4.1429 | 0.0000 |
eq1.1 | private | 0.0660 | 0.1376 | 0.4794 | 0.6317 |
eq1.2 | (Intercept) | -4.1308 | 4.2916 | -0.9625 | 0.3359 |
eq1.2 | age | -0.0612 | 0.3009 | -0.2034 | 0.8388 |
eq1.2 | age2 | 1.8772 | 0.5223 | 3.5939 | 0.0003 |
eq1.2 | black | 1.4681 | 0.1153 | 12.7375 | 0.0000 |
eq1.2 | south | 0.4270 | 0.1028 | 4.1550 | 0.0000 |
eq1.2 | urban | -0.8285 | 0.1091 | -7.5937 | 0.0000 |
eq1.2 | public | -0.4694 | 0.1133 | -4.1429 | 0.0000 |
eq1.2 | private | -0.0660 | 0.1376 | -0.4794 | 0.6317 |
eq1.3 | (Intercept) | 6.0994 | 0.9001 | 6.7763 | 0.0000 |
eq1.3 | age | -0.5545 | 0.0631 | -8.7851 | 0.0000 |
eq1.3 | age2 | 1.3135 | 0.1095 | 11.9903 | 0.0000 |
eq1.3 | black | 0.2820 | 0.0242 | 11.6654 | 0.0000 |
eq1.3 | south | 0.1035 | 0.0216 | 4.8040 | 0.0000 |
eq1.3 | urban | -0.1738 | 0.0229 | -7.5955 | 0.0000 |
eq1.3 | public | -0.1033 | 0.0238 | -4.3456 | 0.0000 |
eq1.3 | private | -0.0018 | 0.0289 | -0.0626 | 0.9501 |
eq2 | (Intercept) | 3.7481 | 0.4834 | 7.7541 | 0.0000 |
eq2 | edu | 0.1597 | 0.0409 | 3.9044 | 0.0001 |
eq2 | exp | 0.0470 | 0.0250 | 1.8799 | 0.0602 |
eq2 | exp2 | -0.0323 | 0.1281 | -0.2517 | 0.8013 |
eq2 | black | -0.0640 | 0.0630 | -1.0162 | 0.3096 |
eq2 | south | -0.0857 | 0.0256 | -3.3448 | 0.0008 |
eq2 | urban | 0.0835 | 0.0412 | 2.0239 | 0.0431 |