lm() function;# An Independent variable X
X <- rnorm(100,0,1)
# A Dependent variable Y
Y <- rnorm(100,0,1)+X
# Running the regression
mod <- lm(Y~X)
# Getting the predicted values (hats) from the model
Yhat <- predict(mod,type = "response")
# Making the graph
dat <- data.frame(X,Y,Yhat)
ggplot(dat,aes(x=X,y=Y))+
geom_segment(aes(xend = X, yend = Yhat),color="red",alpha=0.5) +
geom_point(color="#4F2683")+
geom_smooth(method="lm",se = F,color="#4F2683")+
theme_minimal()
lm() functionNeeds two things:
formula type object;X1 <- rnorm(1000)
X2 <- rnorm(1000)
X3 <- rnorm(1000)
X4 <- sample(c("A","B","C"), 1000, replace =T)
Y <- rnorm(1000) + .5*X1 - .8* X2 + .2*X1*X2 - .3*X3^2 + ifelse(X4=="A",0.2,ifelse(X4=="B",0.4,0.6))
tmp <- data.frame(Y,X1,X2,X3,X4)
# Formulas generally look like this
Y ~ X1 + X2
# You can add an interaction
Y ~ X1 + X2 + X1*X2
# A Squared term
Y ~ X1 + X2 + X1*X2 + I(X3^2)
# A categorical variable
Y ~ X1 + X2 + X1*X2 + I(X3^2) + factor(X4)
mod1 <- lm(Y ~ X1 + X2,data=tmp)
summary(mod1)
##
## Call:
## lm(formula = Y ~ X1 + X2, data = tmp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1452 -0.6923 0.0180 0.6911 3.3754
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08509 0.03451 2.466 0.0138 *
## X1 0.50269 0.03506 14.337 <2e-16 ***
## X2 -0.75844 0.03571 -21.236 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.091 on 997 degrees of freedom
## Multiple R-squared: 0.3911, Adjusted R-squared: 0.3898
## F-statistic: 320.1 on 2 and 997 DF, p-value: < 2.2e-16
mod2 <- lm(Y ~ X1 + X2 + X1*X2,data=tmp)
summary(mod2)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X1 * X2, data = tmp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1208 -0.6730 0.0398 0.7073 3.1520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07961 0.03392 2.347 0.0191 *
## X1 0.50172 0.03445 14.564 < 2e-16 ***
## X2 -0.74760 0.03513 -21.278 < 2e-16 ***
## X1:X2 0.20987 0.03458 6.069 1.83e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.072 on 996 degrees of freedom
## Multiple R-squared: 0.4128, Adjusted R-squared: 0.411
## F-statistic: 233.4 on 3 and 996 DF, p-value: < 2.2e-16
mod3 <- lm(Y ~ X1 + X2 + X1*X2 + I(X3^2),data=tmp)
summary(mod3)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X1 * X2 + I(X3^2), data = tmp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2078 -0.6813 -0.0052 0.6556 2.9619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.36635 0.03806 9.625 < 2e-16 ***
## X1 0.50401 0.03178 15.858 < 2e-16 ***
## X2 -0.76260 0.03243 -23.512 < 2e-16 ***
## I(X3^2) -0.29846 0.02255 -13.234 < 2e-16 ***
## X1:X2 0.20914 0.03190 6.555 8.9e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9888 on 995 degrees of freedom
## Multiple R-squared: 0.5007, Adjusted R-squared: 0.4987
## F-statistic: 249.4 on 4 and 995 DF, p-value: < 2.2e-16
mod4 <- lm(Y ~ X1 + X2 + X1*X2 + I(X3^2) + factor(X4),data=tmp)
summary(mod4)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X1 * X2 + I(X3^2) + factor(X4), data = tmp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2156 -0.6708 -0.0086 0.6173 3.1599
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.17870 0.05574 3.206 0.00139 **
## X1 0.49864 0.03126 15.951 < 2e-16 ***
## X2 -0.77483 0.03192 -24.277 < 2e-16 ***
## I(X3^2) -0.30767 0.02223 -13.837 < 2e-16 ***
## factor(X4)B 0.12999 0.07631 1.703 0.08880 .
## factor(X4)C 0.45023 0.07469 6.028 2.34e-09 ***
## X1:X2 0.20251 0.03139 6.452 1.72e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9711 on 993 degrees of freedom
## Multiple R-squared: 0.5193, Adjusted R-squared: 0.5164
## F-statistic: 178.8 on 6 and 993 DF, p-value: < 2.2e-16
wage1 dataset from wooldridge,
we regress wage on educ. Data from 1976.library(wooldridge)
data("wage1")
mod <- lm(wage~educ,wage1)
summary(mod)
##
## Call:
## lm(formula = wage ~ educ, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3396 -2.1501 -0.9674 1.1921 16.6085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.90485 0.68497 -1.321 0.187
## educ 0.54136 0.05325 10.167 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.378 on 524 degrees of freedom
## Multiple R-squared: 0.1648, Adjusted R-squared: 0.1632
## F-statistic: 103.4 on 1 and 524 DF, p-value: < 2.2e-16
ggplot(wage1,aes(educ,wage))+
geom_point(color="#4F2683",alpha=0.5)+
geom_smooth(method="lm",color="#4F2683",se=F)+
geom_smooth(method="loess",color="red",se=F)+
theme_minimal()+
labs(x="Years of education",y="Hourly earnings")
wage1 dataset from wooldridge,
we regress wage on tenure. Data from
1976.mod <- lm(wage~tenure,wage1)
summary(mod)
##
## Call:
## lm(formula = wage ~ tenure, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8107 -2.0703 -0.9469 1.1899 16.6278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.99093 0.18516 26.955 < 2e-16 ***
## tenure 0.17733 0.02094 8.466 2.56e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.467 on 524 degrees of freedom
## Multiple R-squared: 0.1203, Adjusted R-squared: 0.1187
## F-statistic: 71.68 on 1 and 524 DF, p-value: 2.556e-16
wage1 dataset from wooldridge,
we regress wage on exper. Data from 1976.mod <- lm(wage~exper,wage1)
summary(mod)
##
## Call:
## lm(formula = wage ~ exper, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.936 -2.458 -1.112 1.077 18.716
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.37331 0.25699 20.908 < 2e-16 ***
## exper 0.03072 0.01181 2.601 0.00955 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.673 on 524 degrees of freedom
## Multiple R-squared: 0.01275, Adjusted R-squared: 0.01086
## F-statistic: 6.766 on 1 and 524 DF, p-value: 0.009555
wage1 dataset from wooldridge,
we regress wage on
educ,tenure,exper, and
female. Data from 1976.mod <- lm(wage~educ+tenure+exper+female,wage1)
summary(mod)
##
## Call:
## lm(formula = wage ~ educ + tenure + exper + female, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7675 -1.8080 -0.4229 1.0467 14.0075
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.56794 0.72455 -2.164 0.0309 *
## educ 0.57150 0.04934 11.584 < 2e-16 ***
## tenure 0.14101 0.02116 6.663 6.83e-11 ***
## exper 0.02540 0.01157 2.195 0.0286 *
## female -1.81085 0.26483 -6.838 2.26e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.958 on 521 degrees of freedom
## Multiple R-squared: 0.3635, Adjusted R-squared: 0.3587
## F-statistic: 74.4 on 4 and 521 DF, p-value: < 2.2e-16
approval:
approve on gasprice.
gasprice is in cents, approve is on a scale
from 0 to 100% approval.geom_smooth(method="lm").