Point estimation

\[X_1, \dots, X_n \sim \text{Bernoulli}(p)\]

How can we estimate \(p\)?


\[\hat{p} = \frac{1}{n}\sum_{i=1}^n X_i\]


coin_flips <- rbinom(10, 1, .3)
coin_flips
##  [1] 1 1 0 0 1 1 1 0 0 0

p_hat <- mean(coin_flips)
p_hat
## [1] 0.5

# New sample
coin_flips <- rbinom(10, 1, .3)
coin_flips
##  [1] 0 0 0 0 1 1 1 0 0 1
p_hat <- mean(coin_flips)
p_hat
## [1] 0.4

mean(rbinom(10, 1, .3))
## [1] 0.2
mean(rbinom(10, 1, .3))
## [1] 0.3
mean(rbinom(10, 1, .3))
## [1] 0.2
mean(rbinom(10, 1, .3))
## [1] 0.2

p_hat_dist <- replicate(1000, mean(rbinom(10, 1, .3)))
hist(p_hat_dist, xlim=c(0, 1))

Bias of an estimator

p <- .3
p_hat_dist <- replicate(1000, mean(rbinom(10, 1, p)))
hist(p_hat_dist, xlim=c(0, 1))


head(p_hat_dist)
## [1] 0.2 0.3 0.1 0.0 0.5 0.5
mean(p_hat_dist)
## [1] 0.2965
mean(p_hat_dist) - .3
## [1] -0.0035

Consistency

p <- .3
p_hat_dist_n_10 <- 
  replicate(1e4, mean(rbinom(10, 1, p)))
head(p_hat_dist_n_10)
## [1] 0.4 0.2 0.2 0.4 0.2 0.7
mean(abs(p_hat_dist_n_10 - p) >= .05)
## [1] 0.7269

hist(p_hat_dist_n_10, xlim=c(0, 1))


p_hat_dist_n_100 <- 
  replicate(1e4, mean(rbinom(100, 1, p)))
head(p_hat_dist_n_100)
## [1] 0.40 0.31 0.28 0.29 0.30 0.28
mean(abs(p_hat_dist_n_100 - p) >= .05)
## [1] 0.2236

hist(p_hat_dist_n_100, xlim=c(0, 1))


p_hat_dist_n_1000 <- 
  replicate(1e4, mean(rbinom(1e3, 1, p)))
head(p_hat_dist_n_1000)
## [1] 0.313 0.315 0.302 0.304 0.298 0.283
mean(abs(p_hat_dist_n_1000 - p) >= .05)
## [1] 4e-04

hist(p_hat_dist_n_1000, xlim=c(0, 1))

Standard error

p <- .3
p_hat_dist_n_10 <- 
  replicate(1e4, mean(rbinom(10, 1, p)))
hist(p_hat_dist_n_10, xlim=c(0, 1))


head(p_hat_dist_n_10)
## [1] 0.4 0.2 0.3 0.1 0.3 0.1
sd(p_hat_dist_n_10)
## [1] 0.1451883

(Root) Mean squared error

mean((p_hat_dist_n_10 - p)^2)
## [1] 0.021078
sqrt(mean((p_hat_dist_n_10 - p)^2))
## [1] 0.1451826