Heights of fathers and sons
father_son <- read_tsv("father_son.tsv")
head(father_son)
## # A tibble: 6 × 2
## Father Son
## <dbl> <dbl>
## 1 65.0 59.8
## 2 63.3 63.2
## 3 65.0 63.3
## 4 65.8 62.8
## 5 61.1 64.3
## 6 63.0 64.2
qplot(data = father_son,
x = Father, y = Son, geom = "point", size=I(0.25))
data:image/s3,"s3://crabby-images/d231a/d231a7f84577b25bf6758caa3d601ba5d51ffdf6" alt=""
The standard deviation line
sd_line <- function(x, y) {
slope <- sd(y)/sd(x)
intercept <- mean(y) - mean(x) * slope
geom_abline(slope = slope, intercept = intercept,
linetype = "dashed", color = "blue", size=0.8)
}
qplot(data = father_son,
x = Father, y = Son, geom = "point", size=I(0.25)) +
sd_line(father_son$Father, father_son$Son)
data:image/s3,"s3://crabby-images/194b2/194b26fda05edb1e8e316d8edebaffccbbefd35f" alt=""
measures <- read_tsv("heights_weights.tsv")
head(measures)
## # A tibble: 6 × 2
## weight height
## <int> <int>
## 1 169 72
## 2 150 70
## 3 167 67
## 4 167 66
## 5 152 73
## 6 156 70
p <- qplot(data = measures, x = height, y = weight,
geom = "point", size=I(0.75)) +
sd_line(measures$height, measures$weight)
p
data:image/s3,"s3://crabby-images/c3c0d/c3c0de328bd5fa29971a9b48f18f9c941bcf092f" alt=""
p +
geom_point(aes(x = mean(height) + sd(height),
y = mean(weight) + sd(weight)),
color = "blue", size = 2) +
geom_point(aes(x = mean(height) - sd(height),
y = mean(weight) - sd(weight)),
color = "blue", size = 2)
data:image/s3,"s3://crabby-images/cd42c/cd42c40063b0e61a35512dbd1ca216415c7c7812" alt=""
Correlation
data:image/s3,"s3://crabby-images/b1a68/b1a6830084315420e9976ccb735161d64bd2179b" alt=""
cor(x, y)
## [1] 0.3997
data:image/s3,"s3://crabby-images/acdb5/acdb554c8fcbd4bb33753bda8b30ac5aa2c5011c" alt=""
cor(x, y)
## [1] 0.8995
data:image/s3,"s3://crabby-images/b5f49/b5f49662eeb275169fcf0221cf8cabf6386dbd3f" alt=""
cor(x, y)
## [1] 0.01381
Correlation measures linear association
data:image/s3,"s3://crabby-images/e3bbc/e3bbc53e9ddb64f165754f2d5efae2f292b70030" alt=""
cor(x, y)
## [1] -0.00536
Regression
head(measures)
## # A tibble: 6 × 2
## weight height
## <int> <int>
## 1 169 72
## 2 150 70
## 3 167 67
## 4 167 66
## 5 152 73
## 6 156 70
p <- qplot(data = measures, x = height, y = weight,
geom = "point", size=I(0.75)) +
sd_line(measures$height, measures$weight)
p
data:image/s3,"s3://crabby-images/c3c0d/c3c0de328bd5fa29971a9b48f18f9c941bcf092f" alt=""
q <- p +
geom_point(aes(x = mean(height) + sd(height),
y = mean(weight) + sd(weight)),
color = "blue", size = 2) +
geom_point(aes(x = mean(height) - sd(height),
y = mean(weight) - sd(weight)),
color = "blue", size = 2)
q
data:image/s3,"s3://crabby-images/cd42c/cd42c40063b0e61a35512dbd1ca216415c7c7812" alt=""
Regression line
tall <- measures %>%
filter(height >= 72 & height <= 73)
avg_tall <- mean(tall$weight)
short <- measures %>%
filter(height >= 67 & height <= 68)
avg_short <- mean(short$weight)
q <- q +
geom_point(aes(x = mean(height) + sd(height)),
y = avg_tall, color = "red", size = 2) +
geom_point(aes(x = mean(height) - sd(height)),
y = avg_short, color = "red", size = 2)
q
data:image/s3,"s3://crabby-images/40e36/40e369bccba2d6479b7ea22b7378d1f296d7933b" alt=""
q <- q + geom_smooth(method = "lm", se = FALSE,
color = "red")
q
data:image/s3,"s3://crabby-images/4f862/4f8626e4c200abdd662995009f9d2e19744f6163" alt=""
Regression to the mean
avg_tall
## [1] 181.6
avg_tall - mean(measures$weight)
## [1] 14.64
sd(measures$weight)
## [1] 26.17
(avg_tall - mean(measures$weight)) / sd(measures$weight)
## [1] 0.5596
(avg_short - mean(measures$weight)) / sd(measures$weight)
## [1] -0.5076
cor(measures$height, measures$weight)
## [1] 0.5309
Regression line
measures <- measures %>%
mutate(rounded_height = round_any(height, 2))
head(measures)
## # A tibble: 6 × 3
## weight height rounded_height
## <int> <int> <dbl>
## 1 169 72 72
## 2 150 70 70
## 3 167 67 68
## 4 167 66 66
## 5 152 73 72
## 6 156 70 70
avgs <- measures %>%
group_by(rounded_height) %>%
summarize(avg_weight = mean(weight))
head(avgs)
## # A tibble: 6 × 2
## rounded_height avg_weight
## <dbl> <dbl>
## 1 64 136.5
## 2 66 144.0
## 3 68 154.9
## 4 70 162.5
## 5 72 182.4
## 6 74 190.0
q <- p +
geom_smooth(method = "lm", se = FALSE,
color = "red") +
geom_point(
data = avgs,
aes(rounded_height, avg_weight),
color = "red", size = 2)
q
data:image/s3,"s3://crabby-images/87793/877936dcb3dbf033f01fc7187810b9db27d05e3c" alt=""
data:image/s3,"s3://crabby-images/21902/219020a37c2104b8532816da8817f7acad7dd0e3" alt=""
p +
geom_smooth(method = "lm", se = FALSE,
color = "red")
data:image/s3,"s3://crabby-images/76e81/76e8172ae21cf731487db8c225cc0252cc368883" alt=""
cor(x, y)
## [1] -0.05958
ability <- rnorm(1000, 0, 1)
test_1 <- ability + rnorm(1000, 0, .5)
test_2 <- ability + rnorm(1000, 0, .5)
p <- qplot(test_1, test_2, size=I(0.25)) +
sd_line(test_1, test_2)
p
data:image/s3,"s3://crabby-images/920c6/920c6a71da5235ce840a12e1d7a4c748a719ab4a" alt=""
p <- p +
geom_smooth(method = "lm", se = FALSE,
color = "red")
p
data:image/s3,"s3://crabby-images/e0baf/e0bafcb243f7e72b76ca887a6150954edf585a4e" alt=""