from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
# warnings.simplefilter(action='ignore', category=np.VisibleDeprecationWarning)

Appending Arrays¶

first = np.arange(4)
second = np.arange(10, 17)

first

array([0, 1, 2, 3])

np.append(first, 6)

array([0, 1, 2, 3, 6])

first

array([0, 1, 2, 3])

# how do we change `first`?
first = np.append(first, 6)

np.append(first, second)

array([ 0, 1, 2, 3, 6, 10, 11, 12, 13, 14, 15, 16])

first

array([0, 1, 2, 3, 6])

second

array([10, 11, 12, 13, 14, 15, 16])

Comparison¶

3 > 1

True

type(3 > 1)

bool

True

True

true

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[26], line 1
----> 1 true

NameError: name 'true' is not defined

(We see that capitalization matters.)

3 = 3

  Cell In[27], line 1
    3 = 3
    ^
SyntaxError: cannot assign to literal here. Maybe you meant '==' instead of '='?

3 == 3.0

True

10 != 2

True

x = 14
y = 3

x > 15

False

12 < x

True

x < 20

True

12 < x < 20

True

10 < x-y < 13

True

x > 13 and y < 3.14159

True

(The comparison 12 < x < 20 is equivalent to 12 < x and x < 20.)

Comparisons with arrays¶

pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')

pets == 'cat'

array([ True, True, False, True, False, False], dtype=bool)

1 + 1 + 0 + 1 + 0 + 0

3

sum(make_array(True, True, False, True, False, False))

3

sum(pets == 'cat')

3

np.count_nonzero(pets == 'cat')

3

xs = np.arange(20, 31)

xs

array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])

xs > 28

array([False, False, False, False, False, False, False, False, False,
        True,  True], dtype=bool)

sum(xs > 28)

2

Conditional Statements¶

age = 20

if age >= 18:
    print('You can legally vote.')
else:
    if age >= 16:
        print("You can't vote just yet, but you can pre-register!")
    else:
        print("You can't vote just yet.")

You can't vote just yet, but you can pre-register!

if age >= 18:
    print('You can legally vote.')
if age >= 16:
    print("You can't vote just yet, but you can pre-register!")
else:
    print("You can't vote just yet.")

You can legally vote.
You can't vote just yet, but you can pre-register!

def vote(age):
    if age >= 18:
        return 'You can legally vote.'
    elif age >= 16:
        return "You can't vote just yet, but you can pre-register!"
    else:
        return "You can't vote just yet."

vote(3)

"You can't vote just yet."

vote(17)

"You can't vote just yet, but you can pre-register!"

vote(25)

'You can legally vote.'

def rights(age):
	my_rights = make_array()
	if age >= 16:
		my_rights = np.append(my_rights, 'register to vote')
	if age >= 18:
		my_rights = np.append(my_rights, 'vote')
	return my_rights

rights(3)

array([], dtype=float64)

rights(17)

array(['register to vote'],
      dtype='<U32')

rights(25)

array(['register to vote', 'vote'],
      dtype='<U32')

trip = Table().read_table('trip.csv').sort('Zip Code')
trip.show(5)

def trip_kind(start, end):
    if start == end:
        return 'round trip'
    else:
        return 'one way'

# trip.show(5)

kinds = trip.apply(trip_kind, 'Start Station', 'End Station')
with_kinds = trip.with_column('Trip Kind', kinds)
with_kinds.show(5)

# recall pivot! what will we see?
with_kinds.where('Duration', are.below(600)).pivot('Trip Kind', 'Start Station')

Simulation¶

Let’s play a game: we each roll a die.

If my number is bigger: you pay me a dollar.

If they’re the same: we do nothing.

If your number is bigger: I pay you a dollar.

Steps:

Find a way to simulate two dice rolls.
Compute how much money we win/lose based on the result.
Do steps 1 and 2 10,000 times.

Conditional Statements¶

# Work in progress
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

one_round(4, 3)

1

one_round(2, 6)

def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0

one_round(1, 1)

0

one_round(6, 5)

1

one_round(-7, -1)

-1

Random Selection¶

mornings = make_array('wake up', 'sleep in')

np.random.choice(mornings)

'wake up'

np.random.choice(mornings)

'sleep in'

np.random.choice(mornings)

'wake up'

We can also pass an argument that specifies how many times to make a random choice:

np.random.choice(mornings, 7)

array(['wake up', 'wake up', 'wake up', 'sleep in', 'wake up', 'sleep in',
       'wake up'],
      dtype='<U8')

sum(np.random.choice(mornings, 7) == 'wake up')

5

sum(np.random.choice(mornings, 7) == 'sleep in')

4

^ Why don’t these (always) sum to 7?

morning_week = np.random.choice(mornings, 7)
morning_week

array(['sleep in', 'wake up', 'wake up', 'sleep in', 'wake up', 'wake up',
       'wake up'],
      dtype='<U8')

sum(morning_week == 'wake up')

5

sum(morning_week == 'sleep in')

2

Simulating the roll of a die¶

die_faces = np.arange(1, 7)

np.random.choice(die_faces)

4

def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)

simulate_one_round()

-1

Repeated Betting¶

results = make_array()

results = np.append(results, simulate_one_round())
results

array([-1., 1.])

results = np.append(results, simulate_one_round())
results

array([-1.,  1.,  1., -1., -1.,  1., -1., -1.,  1.,  1.,  1.,  1.,  0.,
        1.,  1.,  1.,  1., -1.,  0.,  0., -1.,  0.,  1., -1.,  1.,  1.,
       -1.,  1.,  0.,  1.])

`For` Statements¶

for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

I love my cat
I love my dog
I love my rabbit

pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)

I love my cat
I love my dog
I love my rabbit

np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

for i in np.arange(10):
    print('GO BEARSSSS')

GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS
GO BEARSSSS

(see slides)

game_outcomes = make_array()

for i in np.arange(5):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

array([ 1., -1., 1., -1., 0.])

game_outcomes = make_array()

for i in np.arange(10000):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

array([-1., 1., 1., ..., -1., 1., 0.])

len(game_outcomes)

10000

results = Table().with_column('My winnings', game_outcomes)

results

results.group('My winnings').barh('My winnings')

sum(game_outcomes)

Another example: simulating heads in 100 coin tosses¶

coin = make_array('heads', 'tails')

sum(np.random.choice(coin, 100) == 'heads')

51

# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')

# Decide how many times you want to repeat the experiment

repetitions = 10000

# Simulate that many outcomes

outcomes = make_array()

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())

heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))

`datascience` library¶

trip.show(3)

trip.where('Duration', are.above(1000))
# .column('Duration')

big_trip_durations = make_array()

for duration in trip.column('Duration'):
    if duration > 1000:
        big_trip_durations = np.append(big_trip_durations, duration)
np.mean(big_trip_durations)

5259.1915219611847

np.mean(
    trip.where('Duration', are.above(1000)).column('Duration')
)

5259.1915219611847

Optional: Advanced `where`¶

ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)

age

age.where('Age', are.above_or_equal_to(18))

voter = ages >= 18

voter

array([False, True, True, False, True, False, False, True], dtype=bool)

age.where(voter)

is_voter = are.above_or_equal_to(18)

type(is_voter)

datascience.predicates._combinable

is_voter(22)

True

is_voter(3)

False

age.apply(is_voter, 'Age')

array([False, True, True, False, True, False, False, True], dtype=bool)

ages >= 18

array([False, True, True, False, True, False, False, True], dtype=bool)

voter

array([False, True, True, False, True, False, False, True], dtype=bool)

def my_voter_function(x):
    return x >= 18

age.where('Age', are.above_or_equal_to(18))

age.where(voter)

Appending Arrays

Appending Arrays¶

Comparison¶

Comparisons with arrays¶

Conditional Statements¶

Simulation¶

Conditional Statements¶

Random Selection¶

Simulating the roll of a die¶

Repeated Betting¶

For Statements¶

Another example: simulating heads in 100 coin tosses¶

datascience library¶

Optional: Advanced where¶

`For` Statements¶

`datascience` library¶

Optional: Advanced `where`¶