from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')Empirical Distributions of a Statistic¶
united = Table.read_table('united.csv')
united = united.with_column('Row', np.arange(united.num_rows)).move_to_start('Row')def sample_median(size):
return np.median(united.sample(size).column('Delay'))# (Population) Parameter
np.median(united.column('Delay'))# (Sample) Statistic
sample_median(10)# (Sample) Statistic
sample_median(100)sample_medians_10 = make_array()
sample_medians_100 = make_array()
sample_medians_1000 = make_array()
num_simulations = 2000
for i in np.arange(num_simulations):
new_median_10 = sample_median(10)
sample_medians_10 = np.append(sample_medians_10, new_median_10)
new_median_100 = sample_median(100)
sample_medians_100 = np.append(sample_medians_100, new_median_100)
new_median_1000 = sample_median(1000)
sample_medians_1000 = np.append(sample_medians_1000, new_median_1000)sample_medians = Table().with_columns('Size 10', sample_medians_10,
'Size 100', sample_medians_100,
'Size 1000', sample_medians_1000)sample_medians.hist(bins = np.arange(-5, 30))Swain vs. Alabama¶
population_proportions = make_array(.26, .74)
population_proportionssample_proportions(100, population_proportions)def panel_proportion():
return sample_proportions(100, population_proportions).item(0)panel_proportion()panels = make_array()
for i in np.arange(10000):
new_panel = panel_proportion() * 100
panels = np.append(panels, new_panel)Table().with_column(
'Number of Black Men on Panel of 100', panels
).hist(bins=np.arange(5.5,40.))
# Plotting details; ignore this code
plots.ylim(-0.002, 0.09)
plots.scatter(8, 0, color='red', s=30);Mendel and Pea Flowers¶
## Mendel had 929 plants, of which 709 had purple flowers
observed_purples = 709 / 929
observed_purplespredicted_proportions = make_array(.75, .25)
sample_proportions(929, predicted_proportions)def purple_flowers():
return sample_proportions(929, predicted_proportions).item(0) * 100purple_flowers()purples = make_array()
for i in np.arange(10000):
new_purple = purple_flowers()
purples = np.append(purples, new_purple)Table().with_column('Percent of purple flowers in sample of 929', purples).hist()
# Plotting details; ignore this code
plots.ylim(-0.02, 0.25)
plots.scatter(observed_purples * 100, 0, color='red', s=30);Table().with_column('Discrepancy in sample of 929 if the model is true', abs(purples- 75)).hist()
# Plotting details; ignore this code
plots.ylim(-0.02, 0.6)
plots.scatter(abs(observed_purples * 100 - 75), 0, color='red', s=30);abs(observed_purples * 100 - 75)