"""
Parse and format Google Sheet number formats.
https://developers.google.com/sheets/api/guides/formats#number_format_tokens
"""
# pylint: disable=c-extension-no-member, broad-exception-raised
import math
import operator
import re
from itertools import zip_longest
from typing import Any, Dict, Iterator, List, Tuple, Union, cast
from shillelagh.adapters.api.gsheets.parsing.base import (
LITERAL,
InvalidValue,
Token,
tokenize,
)
[docs]
def adjust_value(value: Union[int, float], tokens: List[Token]) -> Union[int, float]:
"""
Adjust value applying percent, scientific notation, and comma multiplier.
"""
for token in tokens:
if token.__class__.__name__ == "PERCENT":
value *= 100
elif token.__class__.__name__ == "COMMA":
value /= 1000 ** len(token.token)
elif token.__class__.__name__ == "E":
while value > 10:
value /= 10
while value < 1:
value *= 10
return value
[docs]
class DIGITS(Token):
"""
One or more digits in the number.
Each token renders digits differently:
- 0: if the digit is an insignificant 0, it is rendered as 0.
- #: if the digit is an insignificant 0, it is not rendered.
- ?: if the digit is an insignificant 0, it is rendered as a space.
If there's a comma between tokens it renders the entire number with grouping
separators (grouping by the thousands)
"""
regex = r"(0|#|\?)+(,(0|#|\?){3})?"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
match = re.match(r"\s*(\+|-)?\d+(,\d+)*", value)
if not match:
raise InvalidValue(value)
digits = match.group().replace(",", "")
if is_fractional(self, tokens):
return (
{
"operation": lambda number: math.copysign(
abs(number) + get_fraction(digits),
number,
),
},
value[len(match.group()) :],
)
return (
{"operation": lambda number: number + int(digits)},
value[len(match.group()) :],
)
[docs]
class PERIOD(Token):
"""
The decimal point in the number.
The first period represents the decimal point in the number. Subsequent
periods are rendered as literals. If you include a decimal point in the
format, it will always be rendered, even for whole numbers.
"""
regex = r"\."
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
match = re.match(r"\.", value)
if not match:
raise InvalidValue(value)
return {}, value[1:]
[docs]
class PERCENT(Token):
"""
Literal percent sign.
Appears as a literal but also causes existing numbers to be multiplied
by 100 before being rendered, in order to make percentages more readable.
"""
regex = "%"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
match = re.match("%", value)
if not match:
raise InvalidValue(value)
return {"operation": lambda number: number / 100.0}, value[1:]
[docs]
class COMMA(Token):
"""
Multiplier.
When appearing after the digit characters the comma scales the digits by one thousand
per comma.
"""
regex = ",+"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
size = len(self.token)
return {"operation": lambda number: number * 1000**size}, value
[docs]
class E(Token): # pylint: disable=invalid-name
"""
Scientific format.
Renders the number in scientific format, with the formatting to the left of the E
used for the non-exponent portion and the formatting to the right of the E used for
the exponent portion. E+ will show a + sign for positive exponents. E- will only
show a sign for negative exponents. If lowercase is used, the output e is
lowercased as well.
"""
regex = r"(E|e)(-|\+)((0|#|\?)+)"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
match = re.match(r"(E|e)((\+|-)?\d+)", value)
if not match:
raise InvalidValue(value)
exponent = int(match.group(2))
size = len(match.group())
return {"operation": lambda number: number * 10**exponent}, value[size:]
[docs]
class FRACTION(Token):
"""
Fractional format.
If it appears between two digit characters (0, # or ?), then it treats those digit
groups as a fractional format. For example, the number format 0 #/# renders the
number 23.25 as 23 1/4. The denominator can also be a literal integer, in which case
it will enforce that integer as the denominator. The number format 0 #/8 displays
the number 23.25 as 23 2/8. The fraction part is not rendered at all if the
numerator would become 0. The number 23.1 with the number format 0 #/3 renders as
just 23 (because the 0.1 rounded to 0/3). / is not compatible with scientific format
or a format with a decimal point in it.
"""
regex = r"(?:((?:0|#|\?)+))/(?:((?:0|#|\?)+)|(\d+))"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
match = re.match(r"\s*(\d+)/(\d+)", value)
if not match:
# fractions with numerator 0 are not shown
return {}, value
numerator = int(match.group(1))
denominator = int(match.group(2))
size = len(match.group())
return {"operation": lambda number: number + (numerator / denominator)}, value[
size:
]
[docs]
class STAR(Token):
"""
This is included for compatibility with Excel number formats. It is currently ignored.
"""
regex = r"\*"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
return {}, value
[docs]
class UNDERSCORE(Token):
"""
Skips the next character and renders a space.
This is used to line up number formats where the negative value is surrounded
by parenthesis.
"""
regex = "_."
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
if value[:1] != " ":
raise InvalidValue(value[:1])
return {}, value[1:]
[docs]
class AT(Token):
"""
Inserts the raw text for the cell, if the cell has text input.
Not compatible with any of the other special characters and won’t display for
numeric values (which are displayed as general format).
"""
regex = "@"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
i = tokens.index(self)
if i == len(tokens) - 1:
# last token, consume everything
return {"operation": lambda number: value}, ""
# LL(1) to see how much can be consumed
sibling = tokens[i + 1]
j = 0
for j in range(len(value)):
try:
sibling.parse(value[j:], tokens)
break
except InvalidValue:
continue
else:
raise InvalidValue(value)
return {"operation": lambda number: value[:j]}, value[j:]
[docs]
class COLOR(Token):
"""
Causes any value that is rendered by this sub-format to appear with the given text color.
Valid values for Color are Black, Blue, Cyan, Green, Magenta, Red, White or Yellow.
Valid values for the "#" in Color# are 0 - 56 (this color palette shows a list of the
colors that correspond to each number). Number format colors will override any
user-entered colors on the cell, but will not override colors set by conditional
formatting.
"""
regex = r"\[(Black|Blue|Cyan|Green|Magenta|Red|White|Yellow|Color\d{1,2})\]"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
return {}, value
[docs]
class CONDITION(Token):
"""
Replaces the default comparison section with another conditional expression.
Replaces the default positive, negative, or zero comparison section with another
conditional expression. For example, [<100]”Low”;[>1000]”High”;000 will render the
word “Low” for values below 100, “High” for values above 1000 and a three digit
number (with leading 0s) for anything in between. Conditions can only be applied to
the first two sub-formats and if a number matches more than one, it will use the
first one it matches. If there is a third format, it will be used for "everything
else", otherwise if a number doesn’t match either format, it will be rendered as all
"#"s filling up the cell width. The fourth format is always used for text, if it
exists.
"""
regex = r"\[(>|>=|<|<=|=)\d*(\.\d*)?\]"
[docs]
def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]:
return {}, value
[docs]
def get_fraction(number: str) -> float:
"""
Return the fraction associated with a fractional part.
>>> get_fraction("9")
0.9
>>> get_fraction("123")
0.123
>>> get_fraction("1")
0.1
>>> get_fraction("001")
0.001
"""
if int(number) < 0:
raise Exception("Number should be a positive integer")
return cast(float, int(number) / (10 ** len(number)))
[docs]
def parse_number_pattern(value: str, pattern: str) -> float:
"""
Parse a value using a given pattern.
"""
formats = pattern.split(";")
i = -1
for i, format_ in enumerate(formats):
try:
number = parse_number_format(value, format_)
break
except InvalidValue:
# weird edge case
if value.startswith("-"):
try:
number = -parse_number_pattern(value[1:], format_)
break
except InvalidValue:
pass
else:
try:
return int(value)
except ValueError:
try:
return float(value)
except ValueError as ex:
raise InvalidValue(
f"Unable to parse value {value} with pattern {pattern}",
) from ex
# is negative?
if i == 1 and (len(formats) > 2 or (len(formats) == 2 and "@" not in formats[1])):
return -number
return number
[docs]
def fix_periods(tokens: Iterator[Token]) -> Iterator[Token]:
"""
Convert periods into literals after the first one.
"""
seen_period = False
for token in tokens:
if token.__class__.__name__ == "PERIOD":
yield LITERAL(".") if seen_period else token
seen_period = True
else:
yield token
[docs]
def has_condition(pattern: str) -> bool:
"""
Return true if the pattern has condition instructions.
"""
return bool(re.match(r"\[(>|>=|<|<=|=)\d*(\.\d*)?\]", pattern))
[docs]
def condition_matches(value: Union[int, float], format_: str) -> bool:
"""
Return true if the value matches the condition in the pattern.
"""
match = re.search(r"\[(>|>=|<|<=|=)(\d*(?:\.\d*)?)\]", format_)
if not match:
return True
operators = {
">": operator.gt,
">=": operator.ge,
"<": operator.lt,
"<=": operator.le,
"=": operator.eq,
}
comparison, threshold = match.groups()
op = operators[comparison] # pylint: disable=invalid-name
return bool(op(float(value), float(threshold)))
[docs]
def is_fractional(token: Token, tokens: List[Token]) -> bool:
"""
Return true if the token is after the period.
"""
seen_self = False
for sibling in tokens:
if sibling is token:
seen_self = True
elif isinstance(sibling, PERIOD) and not seen_self:
return True
return False