from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
import warnings
warnings.simplefilter(action='ignore',category=np.VisibleDeprecationWarning)Lecture 11¶
Rows from lists¶
Table().with_columns('Numbers', [1, 2, 3])drinks = Table(['Drink', 'Cafe', 'Price'])
drinksdrinks = drinks.with_rows([
['Milk Tea', 'Asha', 5.5],
['Espresso', 'Strada', 1.75],
['Latte', 'Strada', 3.25],
['Espresso', "FSM", 2]
])
drinksCross-classification: grouping by two columns¶
survey = Table.read_table('welcome_survey_sp26.csv')
survey.show(3)survey.group(['Handedness', 'Sleep Position']).show()survey.group(['Handedness', 'Sleep Position'], np.average).show()Pivot Tables¶
survey.group('Handedness')survey.pivot('Sleep Position', 'Handedness')survey.pivot('Sleep Position',
'Handedness',
values='Sleep Hours',
collect=np.average)Discussion Questions¶
# From the CORGIS Dataset Project
# By Austin Cory Bart acbart@vt.edu
# Version 2.0.0, created 3/22/2016
# https://corgis-edu.github.io/corgis/csv/skyscrapers/
sky = Table.read_table('skyscrapers.csv')
sky = (sky.with_column('age', 2022 - sky.column('completed'))
.drop('completed'))
sky.show(3)# 1. For each city, what’s the height of the tallest building for each material?
sky.pivot('material', 'city', 'height', max)# 2. For each city, what’s the height difference between the tallest
# steel building and the tallest concrete building?
WARNING: ANSWERS BELOW
sky.select('material', 'city', 'height').group(['city', 'material'], collect=max)tallest = sky.pivot('material', 'city', values='height', collect=max)
tallest.show()diffs = Table().with_columns(
'city', tallest.column('city'),
'difference', abs(tallest.column('steel') - tallest.column('concrete'))
)
diffsdiffs.sort('difference', descending=True)# 3. Generate a table of the names of the oldest buildings for each
# material for each city:
# Hint: You can use sort to find the name of the oldest building in the dataset
sky.sort('age', descending=True).column('name').item(0)
WARNING: ANSWERS BELOW
def first(s):
"Return the first element in an array."
return s.item(0)
(sky
.sort('age', descending=True)
.pivot('material', 'city', 'name', first)
)#.where('city', 'San Francisco')Joins¶
drinksdiscounts = Table().with_columns(
'Coupon % off', make_array(10, 25, 5),
'Location', make_array('Asha', 'Strada', 'Asha')
)
discountscombined = drinks.join('Cafe', discounts, 'Location')
combineddiscounted_frac = 1 - combined.column('Coupon % off') / 100
combined.with_column(
'Discounted Price',
combined.column('Price') * discounted_frac
)drinks.join('Cafe', drinks, 'Cafe')