Source code for datascience.formats
"""String formatting for table entries."""
__all__ = ['default_formatter', 'Formatter', 'NumberFormatter',
'CurrencyFormatter', 'DateFormatter', 'PercentFormatter',
'DistributionFormatter']
import numpy as np
from datetime import datetime, timezone
[docs]
class Formatter:
"""String formatter that truncates long values."""
min_width = 4
max_width = 60
etc = ' ...'
def __init__(self, min_width=None, max_width=None, etc=None):
if min_width is not None:
self.min_width = min_width
if max_width is not None:
self.max_width = max_width
if etc is not None:
self.etc = etc
[docs]
def format_column(self, label, column):
"""Return a formatting function that pads & truncates values."""
if len(column) == 0:
val_width = 0
else:
val_width = max(len(self.format_value(v)) for v in column)
val_width = min(val_width, self.max_width)
width = max(val_width, len(str(label)), self.min_width, len(self.etc))
def pad(value, label=False):
if label:
raw = value
else:
raw = self.format_value(value)
if len(raw) > width:
prefix = raw[:width-len(self.etc)] + self.etc
else:
prefix = raw
return prefix.ljust(width)
return pad
[docs]
@staticmethod
def format_value(value):
"""Pretty-print an arbitrary value."""
if isinstance(value, (bool, np.bool_)):
return str(value)
elif isinstance(value, (int, np.integer)):
return '{:d}'.format(value)
elif isinstance(value, (float, np.floating)):
return '{:g}'.format(value)
else:
return str(value)
[docs]
def convert_column(self, values):
"""Convert each value using the convert_value method."""
return list(map(self.convert_value, values))
[docs]
@staticmethod
def convert_value(value):
"""Identity conversion (override to convert values)."""
return value
@property
def converts_values(self):
"""Whether this Formatter also converts values."""
return self.convert_value is not Formatter.convert_value or \
self.convert_column is not Formatter.convert_column
default_formatter = Formatter()
class FunctionFormatter(Formatter):
"""Format values using a function."""
def __init__(self, fn):
self.format_value = lambda v: str(fn(v))
[docs]
class NumberFormatter(Formatter):
"""Format numbers that may have delimiters."""
def __init__(self, decimals=2, decimal_point='.', separator=',', int_to_float=False, *args, **vargs):
super().__init__(*args, **vargs)
self.decimals = decimals
self.decimal_point = decimal_point
self.separator = separator
self.int_to_float = int_to_float
[docs]
def convert_value(self, value):
"""Convert string 93,000.00 to float 93000.0."""
if isinstance(value, str):
value = value.replace(self.separator, '')
if self.decimal_point not in value:
return int(value)
else:
return float(value.replace(self.decimal_point, '.'))
elif self.int_to_float:
return float(value)
else:
return value
[docs]
def format_value(self, value):
if isinstance(value, (int, np.integer)):
return ('{:' + self.separator + 'd}').format(value)
else:
return ('{:' + self.separator + '.' + str(self.decimals) + 'f}').format(value)
[docs]
class CurrencyFormatter(NumberFormatter):
"""Format currency and convert to float."""
def __init__(self, symbol="$", *args, **vargs):
super().__init__(*args, **vargs)
assert isinstance(symbol, str)
self.symbol = symbol
[docs]
def convert_value(self, value):
"""Convert value to float. If value is a string, ensure that the first
character is the same as symbol ie. the value is in the currency this
formatter is representing.
"""
if isinstance(value, str):
assert value.startswith(self.symbol), "Currency does not start with " + self.symbol
value = value.lstrip(self.symbol)
return super().convert_value(value)
[docs]
def format_value(self, value):
"""Format currency."""
return self.symbol + super().format_value(value)
[docs]
class DateFormatter(Formatter):
"""Format date & time and convert to UNIX timestamp."""
def __init__(self, format="%Y-%m-%d %H:%M:%S.%f", *args, **vargs):
super().__init__(*args, **vargs)
assert isinstance(format, str)
self.format = format
[docs]
def convert_value(self, value):
"""Convert 2015-08-03 to a Unix timestamp int."""
return datetime.strptime(value, self.format).timestamp()
[docs]
def format_value(self, value):
"""Format timestamp as a string."""
return datetime.fromtimestamp(value).strftime(self.format)
[docs]
class PercentFormatter(Formatter):
"""Format a number as a percentage."""
def __init__(self, decimals=2, *args, **vargs):
super().__init__(*args, **vargs)
assert isinstance(decimals, int)
self.decimals = decimals
[docs]
def format_value(self, value):
"""Format number as percentage."""
return ('{:.' + str(self.decimals) + '%}').format(value)
[docs]
class DistributionFormatter(PercentFormatter):
"""Normalize a column and format as percentages."""
[docs]
def convert_column(self, values):
"""Normalize values."""
assert all(values >= 0), 'Cannot normalize a column with negatives'
total = sum(values)
if total > 0:
return values / total
else:
return values