Heights of fathers and sons

father_son <- read_tsv("father_son.tsv")
head(father_son)
## # A tibble: 6 × 2
##   Father   Son
##    <dbl> <dbl>
## 1   65.0  59.8
## 2   63.3  63.2
## 3   65.0  63.3
## 4   65.8  62.8
## 5   61.1  64.3
## 6   63.0  64.2

p <- qplot(data = father_son, x = Father, y = Son, 
           geom = "point", size = I(0.25)) +
  geom_smooth(method = "lm", se = FALSE, color = "red")
p

Simple linear regression

model <- lm(Son ~ 1 + Father, data = father_son)
summary(model)

## 
## Call:
## lm(formula = Son ~ 1 + Father, data = father_son)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.8910 -1.5361 -0.0092  1.6359  8.9894 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.89280    1.83289   18.49   <2e-16 ***
## Father       0.51401    0.02706   19.00   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.438 on 1076 degrees of freedom
## Multiple R-squared:  0.2512, Adjusted R-squared:  0.2505 
## F-statistic: 360.9 on 1 and 1076 DF,  p-value: < 2.2e-16

Confidence interval

p <- qplot(data = father_son, x = Father, y = Son, 
           geom = "point", size = I(0.25)) +
  geom_smooth(method = "lm", color = "red", size=.1)
p


preds <- predict(model, father_son, interval="predict")
head(preds)
##        fit      lwr      upr
## 1 67.30318 62.51480 72.09157
## 2 66.42937 61.63746 71.22129
## 3 67.30318 62.51480 72.09157
## 4 67.71439 62.92709 72.50169
## 5 65.29856 60.49955 70.09757
## 6 66.27517 61.48245 71.06789

df <- cbind(father_son, preds)
head(df)
##   Father  Son      fit      lwr      upr
## 1   65.0 59.8 67.30318 62.51480 72.09157
## 2   63.3 63.2 66.42937 61.63746 71.22129
## 3   65.0 63.3 67.30318 62.51480 72.09157
## 4   65.8 62.8 67.71439 62.92709 72.50169
## 5   61.1 64.3 65.29856 60.49955 70.09757
## 6   63.0 64.2 66.27517 61.48245 71.06789

p + geom_ribbon(
  data = df, aes(ymin = lwr, ymax = upr), alpha = .2)

Multiple regression

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

model_1 <- lm(mpg ~ 1 + wt, data = mtcars)
model_1
## 
## Call:
## lm(formula = mpg ~ 1 + wt, data = mtcars)
## 
## Coefficients:
## (Intercept)           wt  
##      37.285       -5.344

summary(model_1)
## 
## Call:
## lm(formula = mpg ~ 1 + wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5432 -2.3647 -0.1252  1.4096  6.8727 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  37.2851     1.8776  19.858  < 2e-16 ***
## wt           -5.3445     0.5591  -9.559 1.29e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7446 
## F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10

model_2 <- lm(mpg ~ 1 + wt + hp, data = mtcars)
model_2
## 
## Call:
## lm(formula = mpg ~ 1 + wt + hp, data = mtcars)
## 
## Coefficients:
## (Intercept)           wt           hp  
##    37.22727     -3.87783     -0.03177

summary(model_2)
## 
## Call:
## lm(formula = mpg ~ 1 + wt + hp, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.941 -1.600 -0.182  1.050  5.854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.22727    1.59879  23.285  < 2e-16 ***
## wt          -3.87783    0.63273  -6.129 1.12e-06 ***
## hp          -0.03177    0.00903  -3.519  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared:  0.8268, Adjusted R-squared:  0.8148 
## F-statistic: 69.21 on 2 and 29 DF,  p-value: 9.109e-12