Heights of fathers and sons
father_son <- read_tsv("father_son.tsv")
head(father_son)
## # A tibble: 6 × 2
## Father Son
## <dbl> <dbl>
## 1 65.0 59.8
## 2 63.3 63.2
## 3 65.0 63.3
## 4 65.8 62.8
## 5 61.1 64.3
## 6 63.0 64.2
p <- qplot(data = father_son, x = Father, y = Son,
geom = "point", size=I(0.25)) +
geom_smooth(method = "lm", se = FALSE, color = "red")
p
Simple linear regression
model <- lm(Son ~ 1 + Father, data = father_son)
model
##
## Call:
## lm(formula = Son ~ 1 + Father, data = father_son)
##
## Coefficients:
## (Intercept) Father
## 33.893 0.514
newdata <- as_tibble(data.frame(
Father = c(72, 64, 70)
))
predict(model, newdata)
## 1 2 3
## 70.90123 66.78918 69.87321
## # A tibble: 1 × 2
## Father Son
## <dbl> <dbl>
## 1 71.8 72.6
## # A tibble: 1 × 3
## Father Son pred
## <dbl> <dbl> <dbl>
## 1 71.8 72.6 70.79843
father_son <- father_son %>%
mutate(pred = predict(model, .),
residual = Son - pred)
head(father_son)
## # A tibble: 6 × 4
## Father Son pred residual
## <dbl> <dbl> <dbl> <dbl>
## 1 65.0 59.8 67.30318 -7.5031849
## 2 63.3 63.2 66.42937 -3.2293748
## 3 65.0 63.3 67.30318 -4.0031849
## 4 65.8 62.8 67.71439 -4.9143896
## 5 61.1 64.3 65.29856 -0.9985618
## 6 63.0 64.2 66.27517 -2.0751730
model
##
## Call:
## lm(formula = Son ~ 1 + Father, data = father_son)
##
## Coefficients:
## (Intercept) Father
## 33.893 0.514
r <- cor(father_son$Father, father_son$Son)
sd_x <- sd(father_son$Father)
sd_y <- sd(father_son$Son)
beta_1 <- r * sd_y / sd_x
beta_1
## [1] 0.5140059
model
##
## Call:
## lm(formula = Son ~ 1 + Father, data = father_son)
##
## Coefficients:
## (Intercept) Father
## 33.893 0.514
mu_x <- mean(father_son$Father)
mu_y <- mean(father_son$Son)
beta_0 <- mu_y - beta_1 * mu_x
beta_0
## [1] 33.8928
qplot(data = father_son, x = residual,
geom = "histogram")
summary(model)
Call:
lm(formula = Son ~ 1 + Father, data = father_son)
Residuals:
Min 1Q Median 3Q Max
-8.8910 -1.5361 -0.0092 1.6359 8.9894
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.89280 1.83289 18.49 <2e-16 ***
Father 0.51401 0.02706 19.00 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.438 on 1076 degrees of freedom
Multiple R-squared: 0.2512, Adjusted R-squared: 0.2505
F-statistic: 360.9 on 1 and 1076 DF, p-value: < 2.2e-16