Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)

Lecture 10

Prediction

families = Table.read_table('family_heights.csv')
families
Loading...
parent_avgs = (families.column('father') + families.column('mother'))/2
heights = Table().with_columns(
    'Parent Average', parent_avgs,
    'Child', families.column('child'), # At adulthood
    'M/F', families.column('child m/f')
)
heights
Loading...
heights.scatter('Parent Average', 'Child')
<Figure size 500x500 with 1 Axes>
heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2);
<Figure size 500x500 with 1 Axes>
nearby = heights.where('Parent Average', are.between(67.5, 68.5))
nearby_mean = np.average(nearby.column('Child'))
nearby_mean
67.620000000000005
heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2)
plots.scatter(68, nearby_mean, color='red', s=50);
<Figure size 500x500 with 1 Axes>
def predict(h):  # h is the average height of the parents, such as 68
    nearby = heights.where('Parent Average', are.between(h - 1/2, h + 1/2))
    return np.average(nearby.column('Child'))
predict(68)
67.620000000000005
predict(70)
68.561904761904756
predict(73)
70.099999999999994
predicted_heights = heights.apply(predict, 'Parent Average')
heights = heights.with_column('Prediction', predicted_heights)
heights
Loading...
heights.select('Parent Average', 'Child', 'Prediction').scatter('Parent Average')
<Figure size 500x500 with 1 Axes>

Prediction Accuracy

pred_errs = heights.column('Prediction') - heights.column('Child')
heights = heights.with_column('errors',pred_errs)
heights
Loading...
heights.hist('errors')
<Figure size 600x400 with 1 Axes>
heights.hist('errors', group='M/F')
<Figure size 600x400 with 1 Axes>

Discussion Question

def predict_smarter(h, s):
    nearby = heights.where('Parent Average', are.between(h - 1/2, h + 1/2))
    nearby_same_mf = nearby.where('M/F', s)
    return np.average(nearby_same_mf.column('Child'))
predict_smarter(68, 'female')
64.983516483516482
predict_smarter(68, 'male')
70.172340425531914
heights.apply(predict_smarter, "Parent Average", "M/F")
array([ 73.2 , 69.06666667, 69.06666667, 69.06666667, 73.48888889, 73.48888889, 67.65 , 67.65 , 70.79545455, 65.7 , 70.79545455, 70.79545455, 65.7 , 65.7 , 65.7 , 69.25322581, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 63.59495798, 67.65 , 73.48888889, 73.48888889, 73.48888889, 73.48888889, 67.65 , 67.65 , 66.82 , 66.82 , 66.82 , 66.82 , 65.75882353, 70.17234043, 70.17234043, 64.98351648, 64.98351648, 64.98351648, 64.98351648, 64.98351648, 64.98351648, 64.67788462, 71.00952381, 66.11428571, 71.00952381, 71.00952381, 71.00952381, 71.00952381, 65.75882353, 70.921875 , 70.921875 , 70.921875 , 70.921875 , 70.921875 , 65.30571429, 65.30571429, 65.30571429, 65.30571429, 70.79047619, 70.79047619, 70.79047619, 70.79047619, 65.44318182, 65.44318182, 65.25666667, 65.25666667, 65.25666667, 65.1 , 73.48888889, 73.48888889, 73.48888889, 67.65 , 67.65 , 67.65 , 67.65 , 67.65 , 71.00952381, 66.11428571, 66.11428571, 70.79545455, 70.79545455, 65.7 , 70.56444444, 70.56444444, 70.56444444, 65.25666667, 65.25666667, 65.25666667, 65.25666667, 65.44318182, 64.98351648, 64.98351648, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 64.67788462, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.98351648, 64.98351648, 64.98351648, 63.88857143, 69.43969466, 69.43969466, 69.43969466, 64.4 , 64.4 , 64.4 , 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.88857143, 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.5928 , 71.00952381, 71.00952381, 71.00952381, 71.00952381, 66.11428571, 70.921875 , 70.921875 , 70.921875 , 65.30571429, 70.56444444, 65.25666667, 65.25666667, 65.25666667, 70.56444444, 70.56444444, 70.56444444, 65.25666667, 65.25666667, 65.25666667, 70.56444444, 65.25666667, 70.56444444, 70.56444444, 70.56444444, 70.56444444, 65.25666667, 70.81555556, 70.61 , 70.61 , 70.61 , 70.61 , 65.1 , 65.1 , 70.56444444, 65.25666667, 70.61 , 70.61 , 70.17234043, 70.17234043, 64.98351648, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 70.61186441, 70.61186441, 70.61186441, 65.1 , 69.79677419, 69.79677419, 69.79677419, 70.17234043, 70.17234043, 70.17234043, 64.98351648, 64.98351648, 64.98351648, 64.98351648, 70.08877551, 64.91382979, 64.39917355, 64.39917355, 69.43969466, 69.43969466, 64.4 , 64.4 , 64.4 , 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.88857143, 63.88857143, 63.88857143, 63.88857143, 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.5928 , 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 69.25322581, 63.59495798, 63.59495798, 63.59495798, 63.59495798, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.44215686, 68.14047619, 68.14047619, 70.79545455, 70.79545455, 70.79545455, 65.7 , 70.79545455, 70.79545455, 65.7 , 65.7 , 65.7 , 65.7 , 70.921875 , 70.56444444, 70.56444444, 65.25666667, 65.25666667, 65.25666667, 65.25666667, 70.61 , 70.61 , 70.61 , 70.61 , 65.1 , 65.1 , 65.1 , 65.1 , 65.1 , 65.1 , 65.1 , 70.08877551, 70.08877551, 70.08877551, 70.08877551, 64.91382979, 64.91382979, 64.91382979, 64.91382979, 64.91382979, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 64.67788462, 69.79677419, 69.79677419, 69.65691057, 69.65691057, 69.65691057, 64.57889908, 64.57889908, 64.57889908, 64.57889908, 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 69.49779412, 69.49779412, 64.39917355, 64.39917355, 64.39917355, 69.43969466, 69.43969466, 69.43969466, 69.43969466, 69.43969466, 69.43969466, 64.4 , 64.4 , 64.67788462, 69.2553719 , 63.88857143, 63.88857143, 63.88857143, 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.88857143, 63.88857143, 69.23050847, 69.23050847, 69.23050847, 63.83267327, 63.83267327, 63.83267327, 63.83267327, 63.83267327, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 69.25322581, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 69.01344538, 69.01344538, 63.5928 , 63.5928 , 69.06396396, 69.06396396, 69.06396396, 69.06396396, 69.06396396, 63.50603448, 63.50603448, 63.50603448, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.5928 , 69.06396396, 69.06396396, 63.50603448, 68.29882353, 68.29882353, 68.00769231, 68.00769231, 63.15238095, 63.15238095, 63.15238095, 63.15238095, 68.2 , 68.2 , 62.57804878, 67.33 , 67.33 , 62.5 , 62.5 , 62.5 , 70.79047619, 70.79047619, 70.79047619, 65.44318182, 65.44318182, 65.44318182, 65.44318182, 65.44318182, 65.44318182, 65.44318182, 64.98351648, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 69.79677419, 69.79677419, 69.79677419, 70.1742268 , 70.1742268 , 70.1742268 , 70.1742268 , 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 70.08877551, 70.08877551, 70.08877551, 70.08877551, 70.08877551, 70.08877551, 64.91382979, 70.17234043, 70.17234043, 64.98351648, 64.98351648, 70.08877551, 64.91382979, 64.91382979, 64.91382979, 64.91382979, 64.91382979, 70.08877551, 70.08877551, 70.08877551, 70.08877551, 64.91382979, 64.91382979, 64.91382979, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 64.67788462, 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.88857143, 63.88857143, 63.88857143, 69.2553719 , 69.2553719 , 69.2553719 , 63.88857143, 63.88857143, 63.88857143, 63.88857143, 69.22459016, 69.22459016, 63.61403509, 63.61403509, 63.50603448, 68.67980769, 63.44215686, 63.44215686, 68.67980769, 68.67980769, 68.67980769, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 69.06396396, 69.06396396, 69.06396396, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 69.06396396, 63.50603448, 63.50603448, 63.42524272, 68.51782178, 68.51782178, 68.51782178, 68.29882353, 68.29882353, 68.29882353, 68.29882353, 63.01 , 68.51782178, 68.51782178, 68.51782178, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 68.51782178, 68.51782178, 68.51782178, 68.51782178, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 68.29882353, 68.29882353, 63.01 , 63.01 , 68.02222222, 68.02222222, 68.02222222, 63.24347826, 63.24347826, 68.00769231, 68.00769231, 68.00769231, 68.00769231, 68.00769231, 68.00769231, 63.15238095, 63.15238095, 63.15238095, 68.14047619, 62.79148936, 62.79148936, 68.14047619, 68.14047619, 62.79148936, 62.79148936, 67.95581395, 71.00952381, 65.75882353, 70.08877551, 70.08877551, 64.91382979, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 69.79677419, 64.67788462, 69.01344538, 69.01344538, 69.25322581, 63.59495798, 69.25322581, 69.25322581, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 63.59495798, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 69.25322581, 69.25322581, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 63.59495798, 63.59495798, 69.25322581, 69.25322581, 69.25322581, 69.25322581, 69.25322581, 63.59495798, 63.59495798, 63.59495798, 68.67980769, 68.67980769, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 68.67980769, 68.67980769, 68.67980769, 68.67980769, 63.44215686, 63.50603448, 68.67980769, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 68.29882353, 68.29882353, 68.29882353, 68.29882353, 68.29882353, 63.01 , 63.01 , 63.01 , 68.67980769, 68.67980769, 68.67980769, 63.44215686, 68.29882353, 68.29882353, 68.29882353, 63.01 , 63.01 , 68.29882353, 68.29882353, 68.29882353, 68.29882353, 68.29882353, 63.01 , 63.01 , 63.01 , 68.29882353, 68.29882353, 68.29882353, 63.01 , 63.01 , 63.01 , 68.67980769, 68.29882353, 68.67047619, 68.67047619, 68.67047619, 68.67047619, 63.42524272, 68.02222222, 68.31975309, 62.99310345, 68.02222222, 68.14047619, 68.14047619, 68.14047619, 62.79148936, 62.79148936, 68.2 , 67.33 , 62.5 , 62.5 , 62.5 , 62.5 , 62.5 , 62.5 , 67.33 , 67.33 , 67.33 , 62.5 , 66.916 , 65.93846154, 65.93846154, 65.93846154, 65.93846154, 65.93846154, 65.93846154, 63.57142857, 63.57142857, 63.57142857, 63.57142857, 69.22459016, 69.22459016, 69.22459016, 63.61403509, 63.61403509, 69.25322581, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.5928 , 68.67980769, 68.67980769, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 69.06396396, 69.06396396, 63.50603448, 63.50603448, 63.50603448, 69.06396396, 69.06396396, 63.50603448, 63.50603448, 68.67980769, 63.44215686, 63.44215686, 69.06396396, 69.06396396, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 63.50603448, 68.29882353, 68.29882353, 68.29882353, 68.29882353, 68.02222222, 68.02222222, 68.02222222, 68.02222222, 68.02222222, 68.02222222, 68.02222222, 63.24347826, 68.00769231, 68.00769231, 63.15238095, 67.95581395, 67.95581395, 67.95581395, 67.95581395, 63.15897436, 67.33 , 69.01344538, 69.01344538, 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.5928 , 69.01344538, 69.01344538, 69.01344538, 63.5928 , 63.5928 , 63.5928 , 63.5928 , 63.5928 , 63.5928 , 68.67980769, 68.67980769, 63.44215686, 63.44215686, 63.44215686, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 63.44215686, 63.44215686, 68.51782178, 68.51782178, 68.51782178, 68.51782178, 63.51975309, 63.51975309, 63.51975309, 63.51975309, 68.51782178, 68.51782178, 68.51782178, 63.51975309, 63.51975309, 68.14047619, 63.15897436, 63.15897436, 67.95581395, 67.95581395, 67.95581395, 63.15897436, 63.15897436, 63.15897436, 68.14047619, 68.14047619, 62.79148936, 62.79148936, 62.79148936, 62.79148936, 62.79148936, 66.916 , 66.575 , 66.575 , 66.575 , 61.61538462, 66.575 , 66.63076923, 66.63076923, 66.63076923, 66.63076923, 66.63076923, 66.63076923, 66.63076923, 60.81818182, 60.81818182, 60.81818182, 60.81818182, 60.81818182, 60.81818182, 60.81818182, 60.81818182, 68.67980769, 68.67980769, 68.67980769, 63.44215686, 63.44215686, 68.29882353, 63.01 , 63.01 , 63.01 , 68.29882353, 68.29882353, 68.29882353, 63.01 , 63.01 , 68.00769231, 68.00769231, 68.00769231, 63.15238095, 63.15238095, 63.15238095, 63.15238095, 63.15238095, 63.15238095, 68.02222222, 63.24347826, 68.00769231, 68.00769231, 68.00769231, 68.00769231, 63.15238095, 63.15238095, 68.14047619, 68.14047619, 62.79148936, 62.79148936, 62.79148936, 62.79148936, 67.33 , 62.5 , 67.33 , 67.33 , 62.5 , 68.2 , 68.2 , 68.2 , 62.57804878, 66.575 , 66.575 , 66.575 , 66.575 , 60.81818182, 67.33 , 67.33 , 62.5 , 62.5 , 62.5 , 62.5 , 62.5 , 67.33 , 67.33 , 62.5 , 62.5 , 62.5 , 62.5 , 62.5 , 65.93846154, 66. , 60. , 62.66666667, 62.66666667, 67.33 , 62.5 , 62.5 , 66.575 , 60.81818182])
smarter_predicted_heights = heights.apply(predict_smarter, 'Parent Average', 'M/F')
heights = heights.with_column('Smarter Prediction', smarter_predicted_heights)
heights
Loading...
smarter_pred_errs = heights.apply(difference, 'Child', 'Smarter Prediction')
heights = heights.with_column('Smarter Errors', smarter_pred_errs)
heights.hist('Smarter Errors', group='M/F')
<Figure size 600x400 with 1 Axes>

Grouping by One Column

cones = Table.read_table('cones.csv').drop('Color')
cones
Loading...
cones.group('Flavor')
Loading...
cones.group('Flavor', np.average)
Loading...
cones.group('Flavor', np.min)
Loading...
cones.group('Flavor', list)
Loading...

Grouping By One Column: Welcome Survey

survey = Table.read_table('welcome_survey_sp26.csv')
survey.show(3)
Loading...
survey.hist('Extroversion')
<Figure size 600x400 with 1 Axes>
survey.select(1, 2, 3).group('Extroversion', np.average)
Loading...
survey.group('Extroversion', np.average).plot('Extroversion', 'Text Recipients average')
<Figure size 600x600 with 1 Axes>
survey.group("Year")
Loading...
(survey
 .select("Year", "Sleep Hours")
 .group("Year", np.average))
Loading...

Lists

[1, 5, 'hello', 5.0]
[1, 5, 'hello', 5.0, make_array(1,2,3)]

Grouping by Two Columns

Do right-handed people tend to sleep on their left side and left-handed people sleep on their right?

survey.group('Sleep Position')
Loading...
survey.group(['Handedness', 'Sleep Position']).show()
Loading...