Heights of fathers and sons
father_son <- read_tsv("father_son.tsv")
head(father_son)
## # A tibble: 6 × 2
## Father Son
## <dbl> <dbl>
## 1 65.0 59.8
## 2 63.3 63.2
## 3 65.0 63.3
## 4 65.8 62.8
## 5 61.1 64.3
## 6 63.0 64.2
p <- qplot(data = father_son, x = Father, y = Son,
geom = "point", size = I(0.25)) +
geom_smooth(method = "lm", se = FALSE, color = "red")
p
Simple linear regression
model <- lm(Son ~ 1 + Father, data = father_son)
summary(model)
##
## Call:
## lm(formula = Son ~ 1 + Father, data = father_son)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8910 -1.5361 -0.0092 1.6359 8.9894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.89280 1.83289 18.49 <2e-16 ***
## Father 0.51401 0.02706 19.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.438 on 1076 degrees of freedom
## Multiple R-squared: 0.2512, Adjusted R-squared: 0.2505
## F-statistic: 360.9 on 1 and 1076 DF, p-value: < 2.2e-16
Confidence interval
p <- qplot(data = father_son, x = Father, y = Son,
geom = "point", size = I(0.25)) +
geom_smooth(method = "lm", color = "red", size=.1)
p
preds <- predict(model, father_son, interval="predict")
head(preds)
## fit lwr upr
## 1 67.30318 62.51480 72.09157
## 2 66.42937 61.63746 71.22129
## 3 67.30318 62.51480 72.09157
## 4 67.71439 62.92709 72.50169
## 5 65.29856 60.49955 70.09757
## 6 66.27517 61.48245 71.06789
df <- cbind(father_son, preds)
head(df)
## Father Son fit lwr upr
## 1 65.0 59.8 67.30318 62.51480 72.09157
## 2 63.3 63.2 66.42937 61.63746 71.22129
## 3 65.0 63.3 67.30318 62.51480 72.09157
## 4 65.8 62.8 67.71439 62.92709 72.50169
## 5 61.1 64.3 65.29856 60.49955 70.09757
## 6 63.0 64.2 66.27517 61.48245 71.06789
p + geom_ribbon(
data = df, aes(ymin = lwr, ymax = upr), alpha = .2)
Multiple regression
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
model_1 <- lm(mpg ~ 1 + wt, data = mtcars)
model_1
##
## Call:
## lm(formula = mpg ~ 1 + wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
summary(model_1)
##
## Call:
## lm(formula = mpg ~ 1 + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
model_2 <- lm(mpg ~ 1 + wt + hp, data = mtcars)
model_2
##
## Call:
## lm(formula = mpg ~ 1 + wt + hp, data = mtcars)
##
## Coefficients:
## (Intercept) wt hp
## 37.22727 -3.87783 -0.03177
summary(model_2)
##
## Call:
## lm(formula = mpg ~ 1 + wt + hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.941 -1.600 -0.182 1.050 5.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
## wt -3.87783 0.63273 -6.129 1.12e-06 ***
## hp -0.03177 0.00903 -3.519 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
## F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12