from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
# warnings.simplefilter(action='ignore', category=np.VisibleDeprecationWarning)Appending Arrays¶
first = np.arange(4)
second = np.arange(10, 17)firstarray([0, 1, 2, 3])np.append(first, 6)array([0, 1, 2, 3, 6])firstarray([0, 1, 2, 3])# how do we change `first`?
first = np.append(first, 6)np.append(first, second)array([ 0, 1, 2, 3, 6, 10, 11, 12, 13, 14, 15, 16])firstarray([0, 1, 2, 3, 6])secondarray([10, 11, 12, 13, 14, 15, 16])Comparison¶
3 > 1Truetype(3 > 1)boolTrueTruetrue---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[26], line 1
----> 1 true
NameError: name 'true' is not defined(We see that capitalization matters.)
3 = 3 Cell In[27], line 1
3 = 3
^
SyntaxError: cannot assign to literal here. Maybe you meant '==' instead of '='?
3 == 3.0True10 != 2Truex = 14
y = 3x > 15False12 < xTruex < 20True12 < x < 20True10 < x-y < 13Truex > 13 and y < 3.14159True(The comparison 12 < x < 20 is equivalent to 12 < x and x < 20.)
Comparisons with arrays¶
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')pets == 'cat'array([ True, True, False, True, False, False], dtype=bool)1 + 1 + 0 + 1 + 0 + 03sum(make_array(True, True, False, True, False, False))3sum(pets == 'cat')3np.count_nonzero(pets == 'cat')3xs = np.arange(20, 31)xsarray([20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])xs > 28array([False, False, False, False, False, False, False, False, False,
True, True], dtype=bool)sum(xs > 28)2Conditional Statements¶
age = 20if age >= 18:
print('You can legally vote.')
else:
if age >= 16:
print("You can't vote just yet, but you can pre-register!")
else:
print("You can't vote just yet.")You can't vote just yet, but you can pre-register!
if age >= 18:
print('You can legally vote.')
if age >= 16:
print("You can't vote just yet, but you can pre-register!")
else:
print("You can't vote just yet.")You can legally vote.
You can't vote just yet, but you can pre-register!
def vote(age):
if age >= 18:
return 'You can legally vote.'
elif age >= 16:
return "You can't vote just yet, but you can pre-register!"
else:
return "You can't vote just yet."vote(3)"You can't vote just yet."vote(17)"You can't vote just yet, but you can pre-register!"vote(25)'You can legally vote.'def rights(age):
my_rights = make_array()
if age >= 16:
my_rights = np.append(my_rights, 'register to vote')
if age >= 18:
my_rights = np.append(my_rights, 'vote')
return my_rightsrights(3)array([], dtype=float64)rights(17)array(['register to vote'],
dtype='<U32')rights(25)array(['register to vote', 'vote'],
dtype='<U32')trip = Table().read_table('trip.csv').sort('Zip Code')
trip.show(5)Loading...
def trip_kind(start, end):
if start == end:
return 'round trip'
else:
return 'one way'
# trip.show(5)
kinds = trip.apply(trip_kind, 'Start Station', 'End Station')
with_kinds = trip.with_column('Trip Kind', kinds)
with_kinds.show(5)Loading...
# recall pivot! what will we see?
with_kinds.where('Duration', are.below(600)).pivot('Trip Kind', 'Start Station')Loading...
Simulation¶
Let’s play a game: we each roll a die.
If my number is bigger: you pay me a dollar.
If they’re the same: we do nothing.
If your number is bigger: I pay you a dollar.
Steps:
Find a way to simulate two dice rolls.
Compute how much money we win/lose based on the result.
Do steps 1 and 2 10,000 times.
Conditional Statements¶
# Work in progress
def one_round(my_roll, your_roll):
if my_roll > your_roll:
return 1one_round(4, 3)1one_round(2, 6)def one_round(my_roll, your_roll):
if my_roll > your_roll:
return 1
elif your_roll > my_roll:
return -1
elif your_roll == my_roll:
return 0one_round(1, 1)0one_round(6, 5)1one_round(-7, -1)-1Random Selection¶
mornings = make_array('wake up', 'sleep in')np.random.choice(mornings)'wake up'np.random.choice(mornings)'sleep in'np.random.choice(mornings)'wake up'We can also pass an argument that specifies how many times to make a random choice:
np.random.choice(mornings, 7)array(['wake up', 'wake up', 'wake up', 'sleep in', 'wake up', 'sleep in',
'wake up'],
dtype='<U8')sum(np.random.choice(mornings, 7) == 'wake up')5sum(np.random.choice(mornings, 7) == 'sleep in')4^ Why don’t these (always) sum to 7?
morning_week = np.random.choice(mornings, 7)
morning_weekarray(['sleep in', 'wake up', 'wake up', 'sleep in', 'wake up', 'wake up',
'wake up'],
dtype='<U8')sum(morning_week == 'wake up')5sum(morning_week == 'sleep in')2Simulating the roll of a die¶
die_faces = np.arange(1, 7)np.random.choice(die_faces)4def simulate_one_round():
my_roll = np.random.choice(die_faces)
your_roll = np.random.choice(die_faces)
return one_round(my_roll, your_roll)simulate_one_round()-1Repeated Betting¶
results = make_array()results = np.append(results, simulate_one_round())
resultsarray([-1., 1.])results = np.append(results, simulate_one_round())
resultsarray([-1., 1., 1., -1., -1., 1., -1., -1., 1., 1., 1., 1., 0.,
1., 1., 1., 1., -1., 0., 0., -1., 0., 1., -1., 1., 1.,
-1., 1., 0., 1.])For Statements¶
for pet in make_array('cat', 'dog', 'rabbit'):
print('I love my ' + pet)I love my cat
I love my dog
I love my rabbit
pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)
pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)
pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)I love my cat
I love my dog
I love my rabbit
np.arange(10)array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])for i in np.arange(10):
print('GO BEARSSSS')GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
(see slides)
game_outcomes = make_array()
for i in np.arange(5):
game_outcomes = np.append(game_outcomes, simulate_one_round())
game_outcomesarray([ 1., -1., 1., -1., 0.])game_outcomes = make_array()
for i in np.arange(10000):
game_outcomes = np.append(game_outcomes, simulate_one_round())
game_outcomesarray([-1., 1., 1., ..., -1., 1., 0.])len(game_outcomes)10000results = Table().with_column('My winnings', game_outcomes)resultsLoading...
results.group('My winnings').barh('My winnings')
sum(game_outcomes)Another example: simulating heads in 100 coin tosses¶
coin = make_array('heads', 'tails')sum(np.random.choice(coin, 100) == 'heads')51# Simulate one outcome
def num_heads():
return sum(np.random.choice(coin, 100) == 'heads')# Decide how many times you want to repeat the experiment
repetitions = 10000# Simulate that many outcomes
outcomes = make_array()
for i in np.arange(repetitions):
outcomes = np.append(outcomes, num_heads())heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))
datascience library¶
trip.show(3)Loading...
trip.where('Duration', are.above(1000))
# .column('Duration')Loading...
big_trip_durations = make_array()for duration in trip.column('Duration'):
if duration > 1000:
big_trip_durations = np.append(big_trip_durations, duration)
np.mean(big_trip_durations)5259.1915219611847np.mean(
trip.where('Duration', are.above(1000)).column('Duration')
)5259.1915219611847Optional: Advanced where¶
ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)ageLoading...
age.where('Age', are.above_or_equal_to(18))Loading...
voter = ages >= 18voterarray([False, True, True, False, True, False, False, True], dtype=bool)age.where(voter)Loading...
is_voter = are.above_or_equal_to(18)type(is_voter)datascience.predicates._combinableis_voter(22)Trueis_voter(3)Falseage.apply(is_voter, 'Age')array([False, True, True, False, True, False, False, True], dtype=bool)ages >= 18array([False, True, True, False, True, False, False, True], dtype=bool)voterarray([False, True, True, False, True, False, False, True], dtype=bool)def my_voter_function(x):
return x >= 18age.where('Age', are.above_or_equal_to(18))Loading...
age.where(voter)Loading...