from datascience import *
import numpy as npTables¶
A table is given a name (such as nba) in our programming environment so that we can perform operations on the whole table.
# From https://github.com/erikgregorywebb/datasets/blob/master/nba-salaries.csv
nba = Table.read_table('nba_salaries.csv')
nbaLoading...
Manipulating the table creates a new table, but doesn’t change/lose the original.
nba.select('name')Loading...
nba.drop('rank', 'position').sort('name')Loading...
nba.sort('name')Loading...
'nba'Because each operation creates a new table, multiple operations can be applied.
nba.where('position', 'PG')Loading...
nba.where('name', "Shaquille O'Neal")Loading...
The result of applying operations is a table, which can also be given a name (e.g., point_guards) in our programming environment.
point_guards = nba.where('position', 'PG').drop('rank', 'position')
point_guardsLoading...
point_guards.where('season', 2020).sort('name').sort('team')Loading...
nbaSorting can be performed in descending order as well.
point_guards.where('season', 2020).sort('salary').show(10)Loading...
point_guards.where('season', 2020).sort('salary', descending=True).show(10)Loading...
Numbers¶
303010 * 3 # int3010 / 3 # float3.333333333333333510 / 25.010 ** 3100010 ** 0.53.16227766016837951234567 ** 8913957418598822621635241167796707938465481984077026315414890101571143458755591622187185505393937077208565691523768960376708334999179371334647109157262849410607614641938921108745920917082393933267785909375673447017235189507943077248810910569264844448002838012241714093256943282431643000874095802505371673542134472941052068155971547947254163132142501376897869240318899538072553238363447235956290854246954797114645706242235684570157179581172114612663973480990457796701260896152899701217853726642011144671600149347697593487711097338399545686373024710 / 33.333333333333333575892745.21548924758927498571275892745.2154892575892745.215489247589274985712 - 75892745.215489250.0(13 ** 0.5) ** 212.9999999999999983.605551275463989 * 3.60555127546398912.999999999999998int(10 / 5)2int(10 / 4)2float(3)3.06 / 41.56 / 40000.00156 / 4000000000000000000000000000000000000000000000000000000001.5e-56400000000000000000000000000000000000000000000000000000000 * 1.5e-56 6.01.5e-56 1.5e-56x = 5x5x + 162x Cell In[64], line 1
2x
^
SyntaxError: invalid decimal literal
2 * x10Strings¶
'Flavor''Flavor'flavor = 2flavor2# The line below causes a name error
# Flavor"Flavor"'Flavor''Don't always use single quotes' Cell In[72], line 1
'Don't always use single quotes'
^
SyntaxError: unterminated string literal (detected at line 1)
"Don't always use single quotes""Don't always use single quotes"'straw' 'berry' # concatenation'strawberry''straw' + 'berry' # concatenation'strawberry''Chris' + 'Paul' # spaces aren't added for you'ChrisPaul''Chris' + ' ' + 'Paul''Chris Paul'x = 'straw'
y = 'berry'
x + y'strawberry'x y Cell In[80], line 1
x y
^
SyntaxError: invalid syntax
'ha' * 100'hahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahahaha''lo' * 5.5---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[82], line 1
----> 1 'lo' * 5.5
TypeError: can't multiply sequence by non-int of type 'float''ha' + 10---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[83], line 1
----> 1 'ha' + 10
TypeError: can only concatenate str (not "int") to str'ha' + str(10)'ha10''ha' + '10''ha10''3' + 5---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[87], line 1
----> 1 '3' + 5
TypeError: can only concatenate str (not "int") to strint('3') + 58int('3.0')---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[90], line 1
----> 1 int('3.0')
ValueError: invalid literal for int() with base 10: '3.0''3.0''3.0'float('3.0')3.0int(3.0)3int(float('3.0'))333dot_oh = '.0'
float('3' + dot_oh)3.0float('3' + dot_oh) * 721.0Types¶
type(10)inta = 10a10type(a)inttype(4.5)floattype('abc')strtype(nba)datascience.tables.Tabletype(Table.read_table('nba_salaries.csv'))datascience.tables.Tabletype(True)booltype(abs(-5))inttype(abs)builtin_function_or_methodArrays¶
first_four = make_array(1, 2, 3, 4)first_fourarray([1, 2, 3, 4])array([1, 2, 3, 4])---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[116], line 1
----> 1 array([1, 2, 3, 4])
NameError: name 'array' is not definedfrom numpy import arrayarray([1, 2, 3, 4])array([1, 2, 3, 4])first_fourarray([1, 2, 3, 4])first_four * 2array([2, 4, 6, 8])first_four ** 2array([ 1, 4, 9, 16])(first_four + 1) ** 2array([ 4, 9, 16, 25])first_four # array is unchanged, just like when we call show/select/drop on Tablearray([1, 2, 3, 4])next_four = make_array(5, 6, 7, 8)next_fourarray([5, 6, 7, 8])first_four + next_fourarray([ 6, 8, 10, 12])only_three = make_array(5, 6, 7)# This line will cause an error
first_four + only_three---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[129], line 2
1 # This line will cause an error
----> 2 first_four + only_three
ValueError: operands could not be broadcast together with shapes (4,) (3,) str_array = make_array('ha', 'he', 'ho')str_arrayarray(['ha', 'he', 'ho'],
dtype='<U2')str_array * 4---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
Cell In[132], line 1
----> 1 str_array * 4
UFuncTypeError: ufunc 'multiply' did not contain a loop with signature matching types (dtype('<U2'), dtype('int64')) -> Nonenext_fourarray([5, 6, 7, 8])next_four.item(0)5next_four.item(4)---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[138], line 1
----> 1 next_four.item(4)
IndexError: index 4 is out of bounds for axis 0 with size 4sum(next_four)np.average(next_four)len(next_four)