Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)

Lecture 11

Rows from lists

Table().with_columns('Numbers', [1, 2, 3])
drinks = Table(['Drink', 'Cafe', 'Price'])
drinks
drinks = drinks.with_rows([
    ['Milk Tea', 'Asha', 5.5],
    ['Espresso', 'Strada',  1.75],
    ['Latte',    'Strada',  3.25],
    ['Espresso', "FSM",   2]
])
drinks

Cross-classification: grouping by two columns

survey = Table.read_table('welcome_survey_sp26.csv')
survey.show(3)
survey.group(['Handedness', 'Sleep Position']).show()
survey.group(['Handedness', 'Sleep Position'], np.average).show()

Pivot Tables

survey.group('Handedness')
survey.pivot('Sleep Position', 'Handedness')
survey.pivot('Sleep Position', 
             'Handedness', 
             values='Sleep Hours', 
             collect=np.average)

Discussion Questions

# From the CORGIS Dataset Project
# By Austin Cory Bart acbart@vt.edu
# Version 2.0.0, created 3/22/2016
# https://corgis-edu.github.io/corgis/csv/skyscrapers/

sky = Table.read_table('skyscrapers.csv')
sky = (sky.with_column('age', 2022 - sky.column('completed'))
          .drop('completed'))
sky.show(3)
# 1. For each city, what’s the height of the tallest building for each material?

sky.pivot('material', 'city', 'height', max)
# 2. For each city, what’s the height difference between the tallest 
#    steel building and the tallest concrete building?










WARNING: ANSWERS BELOW

sky.select('material', 'city', 'height').group(['city', 'material'], collect=max)
tallest = sky.pivot('material', 'city', values='height', collect=max)
tallest.show()
diffs = Table().with_columns(
    'city', tallest.column('city'),
    'difference', abs(tallest.column('steel') - tallest.column('concrete'))
)
diffs
diffs.sort('difference', descending=True)
# 3. Generate a table of the names of the oldest buildings for each 
#    material for each city:

# Hint: You can use sort to find the name of the oldest building in the dataset
sky.sort('age', descending=True).column('name').item(0)









WARNING: ANSWERS BELOW

def first(s):
    "Return the first element in an array."
    return s.item(0)

(sky
 .sort('age', descending=True)
 .pivot('material', 'city', 'name', first)
)#.where('city', 'San Francisco')

Joins

drinks
discounts = Table().with_columns(
    'Coupon % off', make_array(10, 25, 5),
    'Location', make_array('Asha', 'Strada', 'Asha')
)
discounts
combined = drinks.join('Cafe', discounts, 'Location')
combined
discounted_frac = 1 - combined.column('Coupon % off') / 100
combined.with_column(
    'Discounted Price', 
    combined.column('Price') * discounted_frac
)
drinks.join('Cafe', drinks, 'Cafe')