from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)Lecture 10¶
Prediction¶
families = Table.read_table('family_heights.csv')
familiesLoading...
parent_avgs = (families.column('father') + families.column('mother'))/2heights = Table().with_columns(
'Parent Average', parent_avgs,
'Child', families.column('child'), # At adulthood
'M/F', families.column('child m/f')
)
heightsLoading...
heights.scatter('Parent Average', 'Child')
heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2);
nearby = heights.where('Parent Average', are.between(67.5, 68.5))
nearby_mean = np.average(nearby.column('Child'))
nearby_mean67.620000000000005heights.scatter('Parent Average', 'Child')
plots.plot([67.5, 67.5], [50, 85], color='red', lw=2)
plots.plot([68.5, 68.5], [50, 85], color='red', lw=2)
plots.scatter(68, nearby_mean, color='red', s=50);
def predict(h): # h is the average height of the parents, such as 68
nearby = heights.where('Parent Average', are.between(h - 1/2, h + 1/2))
return np.average(nearby.column('Child'))predict(68)67.620000000000005predict(70)68.561904761904756predict(73)70.099999999999994predicted_heights = heights.apply(predict, 'Parent Average')heights = heights.with_column('Prediction', predicted_heights)
heightsLoading...
heights.select('Parent Average', 'Child', 'Prediction').scatter('Parent Average')
Prediction Accuracy¶
pred_errs = heights.column('Prediction') - heights.column('Child')
heights = heights.with_column('errors',pred_errs)
heightsLoading...
heights.hist('errors')
heights.hist('errors', group='M/F')
Discussion Question¶
def predict_smarter(h, s):
nearby = heights.where('Parent Average', are.between(h - 1/2, h + 1/2))
nearby_same_mf = nearby.where('M/F', s)
return np.average(nearby_same_mf.column('Child'))predict_smarter(68, 'female')64.983516483516482predict_smarter(68, 'male')70.172340425531914heights.apply(predict_smarter, "Parent Average", "M/F")array([ 73.2 , 69.06666667, 69.06666667, 69.06666667,
73.48888889, 73.48888889, 67.65 , 67.65 ,
70.79545455, 65.7 , 70.79545455, 70.79545455,
65.7 , 65.7 , 65.7 , 69.25322581,
69.25322581, 69.25322581, 63.59495798, 63.59495798,
63.59495798, 67.65 , 73.48888889, 73.48888889,
73.48888889, 73.48888889, 67.65 , 67.65 ,
66.82 , 66.82 , 66.82 , 66.82 ,
65.75882353, 70.17234043, 70.17234043, 64.98351648,
64.98351648, 64.98351648, 64.98351648, 64.98351648,
64.98351648, 64.67788462, 71.00952381, 66.11428571,
71.00952381, 71.00952381, 71.00952381, 71.00952381,
65.75882353, 70.921875 , 70.921875 , 70.921875 ,
70.921875 , 70.921875 , 65.30571429, 65.30571429,
65.30571429, 65.30571429, 70.79047619, 70.79047619,
70.79047619, 70.79047619, 65.44318182, 65.44318182,
65.25666667, 65.25666667, 65.25666667, 65.1 ,
73.48888889, 73.48888889, 73.48888889, 67.65 ,
67.65 , 67.65 , 67.65 , 67.65 ,
71.00952381, 66.11428571, 66.11428571, 70.79545455,
70.79545455, 65.7 , 70.56444444, 70.56444444,
70.56444444, 65.25666667, 65.25666667, 65.25666667,
65.25666667, 65.44318182, 64.98351648, 64.98351648,
69.79677419, 69.79677419, 69.79677419, 64.67788462,
64.67788462, 69.79677419, 69.79677419, 64.67788462,
69.79677419, 69.79677419, 64.67788462, 64.67788462,
64.67788462, 64.67788462, 64.98351648, 64.98351648,
64.98351648, 63.88857143, 69.43969466, 69.43969466,
69.43969466, 64.4 , 64.4 , 64.4 ,
69.2553719 , 69.2553719 , 69.2553719 , 63.88857143,
63.88857143, 69.2553719 , 69.2553719 , 69.2553719 ,
69.2553719 , 63.88857143, 63.5928 , 71.00952381,
71.00952381, 71.00952381, 71.00952381, 66.11428571,
70.921875 , 70.921875 , 70.921875 , 65.30571429,
70.56444444, 65.25666667, 65.25666667, 65.25666667,
70.56444444, 70.56444444, 70.56444444, 65.25666667,
65.25666667, 65.25666667, 70.56444444, 65.25666667,
70.56444444, 70.56444444, 70.56444444, 70.56444444,
65.25666667, 70.81555556, 70.61 , 70.61 ,
70.61 , 70.61 , 65.1 , 65.1 ,
70.56444444, 65.25666667, 70.61 , 70.61 ,
70.17234043, 70.17234043, 64.98351648, 64.67788462,
64.67788462, 64.67788462, 64.67788462, 64.67788462,
64.67788462, 64.67788462, 64.67788462, 70.61186441,
70.61186441, 70.61186441, 65.1 , 69.79677419,
69.79677419, 69.79677419, 70.17234043, 70.17234043,
70.17234043, 64.98351648, 64.98351648, 64.98351648,
64.98351648, 70.08877551, 64.91382979, 64.39917355,
64.39917355, 69.43969466, 69.43969466, 64.4 ,
64.4 , 64.4 , 69.2553719 , 69.2553719 ,
69.2553719 , 69.2553719 , 63.88857143, 63.88857143,
63.88857143, 63.88857143, 63.88857143, 69.2553719 ,
69.2553719 , 69.2553719 , 63.88857143, 69.01344538,
69.01344538, 63.5928 , 63.5928 , 63.5928 ,
69.01344538, 69.01344538, 69.01344538, 63.5928 ,
63.5928 , 69.25322581, 63.59495798, 63.59495798,
63.59495798, 63.59495798, 69.01344538, 69.01344538,
69.01344538, 69.01344538, 69.01344538, 63.5928 ,
63.5928 , 63.44215686, 68.14047619, 68.14047619,
70.79545455, 70.79545455, 70.79545455, 65.7 ,
70.79545455, 70.79545455, 65.7 , 65.7 ,
65.7 , 65.7 , 70.921875 , 70.56444444,
70.56444444, 65.25666667, 65.25666667, 65.25666667,
65.25666667, 70.61 , 70.61 , 70.61 ,
70.61 , 65.1 , 65.1 , 65.1 ,
65.1 , 65.1 , 65.1 , 65.1 ,
70.08877551, 70.08877551, 70.08877551, 70.08877551,
64.91382979, 64.91382979, 64.91382979, 64.91382979,
64.91382979, 69.79677419, 69.79677419, 69.79677419,
69.79677419, 69.79677419, 64.67788462, 64.67788462,
64.67788462, 69.79677419, 69.79677419, 64.67788462,
64.67788462, 64.67788462, 69.79677419, 69.79677419,
64.67788462, 64.67788462, 64.67788462, 64.67788462,
69.79677419, 69.79677419, 69.79677419, 64.67788462,
64.67788462, 64.67788462, 64.67788462, 69.79677419,
69.79677419, 64.67788462, 69.79677419, 69.79677419,
69.65691057, 69.65691057, 69.65691057, 64.57889908,
64.57889908, 64.57889908, 64.57889908, 69.2553719 ,
69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 ,
69.2553719 , 63.88857143, 69.2553719 , 69.2553719 ,
69.2553719 , 63.88857143, 69.49779412, 69.49779412,
64.39917355, 64.39917355, 64.39917355, 69.43969466,
69.43969466, 69.43969466, 69.43969466, 69.43969466,
69.43969466, 64.4 , 64.4 , 64.67788462,
69.2553719 , 63.88857143, 63.88857143, 63.88857143,
69.2553719 , 69.2553719 , 69.2553719 , 69.2553719 ,
69.2553719 , 69.2553719 , 63.88857143, 63.88857143,
63.88857143, 69.23050847, 69.23050847, 69.23050847,
63.83267327, 63.83267327, 63.83267327, 63.83267327,
63.83267327, 69.25322581, 69.25322581, 63.59495798,
63.59495798, 69.25322581, 69.25322581, 69.25322581,
63.59495798, 63.59495798, 69.25322581, 69.25322581,
63.59495798, 63.59495798, 69.01344538, 69.01344538,
63.5928 , 63.5928 , 69.01344538, 69.01344538,
63.5928 , 63.5928 , 69.06396396, 69.06396396,
69.06396396, 69.06396396, 69.06396396, 63.50603448,
63.50603448, 63.50603448, 69.01344538, 69.01344538,
69.01344538, 69.01344538, 63.5928 , 63.5928 ,
63.5928 , 69.06396396, 69.06396396, 63.50603448,
68.29882353, 68.29882353, 68.00769231, 68.00769231,
63.15238095, 63.15238095, 63.15238095, 63.15238095,
68.2 , 68.2 , 62.57804878, 67.33 ,
67.33 , 62.5 , 62.5 , 62.5 ,
70.79047619, 70.79047619, 70.79047619, 65.44318182,
65.44318182, 65.44318182, 65.44318182, 65.44318182,
65.44318182, 65.44318182, 64.98351648, 69.79677419,
69.79677419, 69.79677419, 69.79677419, 64.67788462,
64.67788462, 64.67788462, 64.67788462, 69.79677419,
69.79677419, 69.79677419, 70.1742268 , 70.1742268 ,
70.1742268 , 70.1742268 , 69.79677419, 69.79677419,
69.79677419, 64.67788462, 64.67788462, 64.67788462,
70.08877551, 70.08877551, 70.08877551, 70.08877551,
70.08877551, 70.08877551, 64.91382979, 70.17234043,
70.17234043, 64.98351648, 64.98351648, 70.08877551,
64.91382979, 64.91382979, 64.91382979, 64.91382979,
64.91382979, 70.08877551, 70.08877551, 70.08877551,
70.08877551, 64.91382979, 64.91382979, 64.91382979,
69.79677419, 69.79677419, 69.79677419, 69.79677419,
64.67788462, 64.67788462, 64.67788462, 64.67788462,
64.67788462, 69.2553719 , 69.2553719 , 69.2553719 ,
63.88857143, 63.88857143, 63.88857143, 63.88857143,
69.2553719 , 69.2553719 , 69.2553719 , 63.88857143,
63.88857143, 63.88857143, 63.88857143, 69.22459016,
69.22459016, 63.61403509, 63.61403509, 63.50603448,
68.67980769, 63.44215686, 63.44215686, 68.67980769,
68.67980769, 68.67980769, 68.67980769, 68.67980769,
63.44215686, 63.44215686, 69.06396396, 69.06396396,
69.06396396, 63.50603448, 63.50603448, 63.50603448,
63.50603448, 69.06396396, 63.50603448, 63.50603448,
63.42524272, 68.51782178, 68.51782178, 68.51782178,
68.29882353, 68.29882353, 68.29882353, 68.29882353,
63.01 , 68.51782178, 68.51782178, 68.51782178,
63.51975309, 63.51975309, 63.51975309, 63.51975309,
63.51975309, 63.51975309, 63.51975309, 63.51975309,
68.51782178, 68.51782178, 68.51782178, 68.51782178,
63.51975309, 63.51975309, 63.51975309, 63.51975309,
68.29882353, 68.29882353, 63.01 , 63.01 ,
68.02222222, 68.02222222, 68.02222222, 63.24347826,
63.24347826, 68.00769231, 68.00769231, 68.00769231,
68.00769231, 68.00769231, 68.00769231, 63.15238095,
63.15238095, 63.15238095, 68.14047619, 62.79148936,
62.79148936, 68.14047619, 68.14047619, 62.79148936,
62.79148936, 67.95581395, 71.00952381, 65.75882353,
70.08877551, 70.08877551, 64.91382979, 69.79677419,
69.79677419, 69.79677419, 69.79677419, 69.79677419,
69.79677419, 69.79677419, 69.79677419, 69.79677419,
69.79677419, 64.67788462, 69.01344538, 69.01344538,
69.25322581, 63.59495798, 69.25322581, 69.25322581,
69.25322581, 69.25322581, 63.59495798, 63.59495798,
63.59495798, 69.01344538, 69.01344538, 63.5928 ,
63.5928 , 69.25322581, 69.25322581, 69.25322581,
69.25322581, 63.59495798, 63.59495798, 63.59495798,
63.59495798, 69.25322581, 69.25322581, 69.25322581,
69.25322581, 69.25322581, 63.59495798, 63.59495798,
63.59495798, 68.67980769, 68.67980769, 68.67980769,
68.67980769, 63.44215686, 63.44215686, 63.44215686,
63.44215686, 63.44215686, 63.44215686, 68.67980769,
68.67980769, 63.44215686, 63.44215686, 68.67980769,
68.67980769, 68.67980769, 68.67980769, 63.44215686,
63.50603448, 68.67980769, 68.67980769, 68.67980769,
63.44215686, 63.44215686, 63.44215686, 63.44215686,
63.44215686, 63.44215686, 63.44215686, 68.29882353,
68.29882353, 68.29882353, 68.29882353, 68.29882353,
63.01 , 63.01 , 63.01 , 68.67980769,
68.67980769, 68.67980769, 63.44215686, 68.29882353,
68.29882353, 68.29882353, 63.01 , 63.01 ,
68.29882353, 68.29882353, 68.29882353, 68.29882353,
68.29882353, 63.01 , 63.01 , 63.01 ,
68.29882353, 68.29882353, 68.29882353, 63.01 ,
63.01 , 63.01 , 68.67980769, 68.29882353,
68.67047619, 68.67047619, 68.67047619, 68.67047619,
63.42524272, 68.02222222, 68.31975309, 62.99310345,
68.02222222, 68.14047619, 68.14047619, 68.14047619,
62.79148936, 62.79148936, 68.2 , 67.33 ,
62.5 , 62.5 , 62.5 , 62.5 ,
62.5 , 62.5 , 67.33 , 67.33 ,
67.33 , 62.5 , 66.916 , 65.93846154,
65.93846154, 65.93846154, 65.93846154, 65.93846154,
65.93846154, 63.57142857, 63.57142857, 63.57142857,
63.57142857, 69.22459016, 69.22459016, 69.22459016,
63.61403509, 63.61403509, 69.25322581, 69.01344538,
69.01344538, 69.01344538, 69.01344538, 69.01344538,
63.5928 , 63.5928 , 63.5928 , 68.67980769,
68.67980769, 63.44215686, 63.44215686, 63.44215686,
63.44215686, 69.06396396, 69.06396396, 63.50603448,
63.50603448, 63.50603448, 69.06396396, 69.06396396,
63.50603448, 63.50603448, 68.67980769, 63.44215686,
63.44215686, 69.06396396, 69.06396396, 63.50603448,
63.50603448, 63.50603448, 63.50603448, 63.50603448,
63.50603448, 63.50603448, 63.50603448, 63.50603448,
68.29882353, 68.29882353, 68.29882353, 68.29882353,
68.02222222, 68.02222222, 68.02222222, 68.02222222,
68.02222222, 68.02222222, 68.02222222, 63.24347826,
68.00769231, 68.00769231, 63.15238095, 67.95581395,
67.95581395, 67.95581395, 67.95581395, 63.15897436,
67.33 , 69.01344538, 69.01344538, 69.01344538,
69.01344538, 69.01344538, 63.5928 , 63.5928 ,
63.5928 , 69.01344538, 69.01344538, 69.01344538,
63.5928 , 63.5928 , 63.5928 , 63.5928 ,
63.5928 , 63.5928 , 68.67980769, 68.67980769,
63.44215686, 63.44215686, 63.44215686, 68.67980769,
68.67980769, 63.44215686, 63.44215686, 63.44215686,
63.44215686, 68.51782178, 68.51782178, 68.51782178,
68.51782178, 63.51975309, 63.51975309, 63.51975309,
63.51975309, 68.51782178, 68.51782178, 68.51782178,
63.51975309, 63.51975309, 68.14047619, 63.15897436,
63.15897436, 67.95581395, 67.95581395, 67.95581395,
63.15897436, 63.15897436, 63.15897436, 68.14047619,
68.14047619, 62.79148936, 62.79148936, 62.79148936,
62.79148936, 62.79148936, 66.916 , 66.575 ,
66.575 , 66.575 , 61.61538462, 66.575 ,
66.63076923, 66.63076923, 66.63076923, 66.63076923,
66.63076923, 66.63076923, 66.63076923, 60.81818182,
60.81818182, 60.81818182, 60.81818182, 60.81818182,
60.81818182, 60.81818182, 60.81818182, 68.67980769,
68.67980769, 68.67980769, 63.44215686, 63.44215686,
68.29882353, 63.01 , 63.01 , 63.01 ,
68.29882353, 68.29882353, 68.29882353, 63.01 ,
63.01 , 68.00769231, 68.00769231, 68.00769231,
63.15238095, 63.15238095, 63.15238095, 63.15238095,
63.15238095, 63.15238095, 68.02222222, 63.24347826,
68.00769231, 68.00769231, 68.00769231, 68.00769231,
63.15238095, 63.15238095, 68.14047619, 68.14047619,
62.79148936, 62.79148936, 62.79148936, 62.79148936,
67.33 , 62.5 , 67.33 , 67.33 ,
62.5 , 68.2 , 68.2 , 68.2 ,
62.57804878, 66.575 , 66.575 , 66.575 ,
66.575 , 60.81818182, 67.33 , 67.33 ,
62.5 , 62.5 , 62.5 , 62.5 ,
62.5 , 67.33 , 67.33 , 62.5 ,
62.5 , 62.5 , 62.5 , 62.5 ,
65.93846154, 66. , 60. , 62.66666667,
62.66666667, 67.33 , 62.5 , 62.5 ,
66.575 , 60.81818182])smarter_predicted_heights = heights.apply(predict_smarter, 'Parent Average', 'M/F')
heights = heights.with_column('Smarter Prediction', smarter_predicted_heights)
heightsLoading...
smarter_pred_errs = heights.apply(difference, 'Child', 'Smarter Prediction')
heights = heights.with_column('Smarter Errors', smarter_pred_errs)heights.hist('Smarter Errors', group='M/F')
Grouping by One Column¶
cones = Table.read_table('cones.csv').drop('Color')
conesLoading...
cones.group('Flavor')Loading...
cones.group('Flavor', np.average)Loading...
cones.group('Flavor', np.min)Loading...
cones.group('Flavor', list)Loading...
Grouping By One Column: Welcome Survey¶
survey = Table.read_table('welcome_survey_sp26.csv')
survey.show(3)Loading...
survey.hist('Extroversion')
survey.select(1, 2, 3).group('Extroversion', np.average)Loading...
survey.group('Extroversion', np.average).plot('Extroversion', 'Text Recipients average')
survey.group("Year")Loading...
(survey
.select("Year", "Sleep Hours")
.group("Year", np.average))Loading...
Lists¶
[1, 5, 'hello', 5.0][1, 5, 'hello', 5.0, make_array(1,2,3)]Grouping by Two Columns¶
Do right-handed people tend to sleep on their left side and left-handed people sleep on their right?
survey.group('Sleep Position')Loading...
survey.group(['Handedness', 'Sleep Position']).show()Loading...