Source code for shillelagh.adapters.api.gsheets.parsing.date

"""
Parse and format Google Sheet date/time patterns.

https://developers.google.com/sheets/api/guides/formats?hl=en#date_and_time_format_patterns
"""

# pylint: disable=invalid-name, fixme, broad-exception-raised

import calendar
import re
from collections import defaultdict
from datetime import date, datetime, time, timedelta
from enum import Enum
from typing import Any, Dict, List, Tuple, Type, TypeVar, Union

from shillelagh.adapters.api.gsheets.parsing.base import LITERAL, Token, tokenize

DateTime = TypeVar("DateTime", datetime, date, time, timedelta)


[docs] class Meridiem(Enum): """ Represent ante or post meridiem. """ AM = "AM" PM = "PM"
[docs] class H(Token): """ Hour of the day. Switches between 12 and 24 hour format depending on whether an am/pm indicator is present in the string. """ regex = "h(?!h)"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: hour = value.hour if ( any(token.__class__.__name__ in {"AP", "AMPM"} for token in tokens) and hour != 12 ): hour %= 12 # the 5th example in https://developers.google.com/sheets/api/guides/formats?hl=en # has a "PM" literal that switches to 12 hour format if ( any( token.__class__.__name__ == "LITERAL" and ("AM" in token.token or "PM" in token.token) for token in tokens ) and hour != 12 ): hour %= 12 return str(hour)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d{1,2}", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) if 0 <= int(match.group()) < 24 else 1 return {"hour": int(value[:size])}, value[size:]
[docs] class HHPlus(H): """ Same as previous, but with a leading 0 for 1-9. """ regex = "hh+"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: return super().format(value, tokens).zfill(2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: return {"hour": int(value[:2])}, value[2:]
[docs] class M(Token): """ If the previous non-literal token was hours or the subsequent one is seconds, then it represents minutes in the hour (no leading 0). Otherwise, it represents the month of the year as a number (no leading 0). """ regex = "m(?!m)" def _is_minute(self, tokens: List[Token]) -> bool: """ Return true if the token represents minutes, false if months. """ is_minute = False i = -1 for i, token in enumerate(tokens): if token is self: break else: raise Exception("Token is not present in list of tokens") for token in reversed(tokens[:i]): if token.__class__.__name__ == "LITERAL": continue if token.__class__.__name__ in {"H", "HHPlus"}: is_minute = True break for token in tokens[i + 1 :]: if token.__class__.__name__ == "LITERAL": continue if token.__class__.__name__ in {"S", "SS"}: is_minute = True break return is_minute
[docs] def format(self, value: Union[date, datetime, time], tokens: List[Token]) -> str: if self._is_minute(tokens) and isinstance(value, (datetime, time)): return str(value.minute) if isinstance(value, (datetime, date)): return str(value.month) raise Exception(f"Cannot format value: {value}")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d{1,2}", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) if 1 <= int(match.group()) <= 24 else 1 if self._is_minute(tokens): return {"minute": int(value[:size])}, value[size:] return {"month": int(value[:size])}, value[size:]
[docs] class MM(M): """ As above, but with a leading 0 for both cases. """ regex = "mm(?!m)"
[docs] def format(self, value: Union[date, datetime, time], tokens: List[Token]) -> str: return super().format(value, tokens).zfill(2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: if self._is_minute(tokens): return {"minute": int(value[:2])}, value[2:] return {"month": int(value[:2])}, value[2:]
[docs] class MMM(Token): """ Three letter month abbreviation (e.g., "Feb"). """ regex = "mmm(?!m)"
[docs] def format(self, value: Union[date, datetime, time], tokens: List[Token]) -> str: return value.strftime("%b")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: month = datetime.strptime(value[:3], "%b").month return {"month": month}, value[3:]
[docs] class MMMM(MMM): """ Full month name. mmmmmm+ also matches this. """ regex = "(mmmm(?!m))|(m{6,})"
[docs] def format(self, value: Union[date, datetime, time], tokens: List[Token]) -> str: return value.strftime("%B")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: token = re.split(r"\b", value, 2)[1] size = len(token) month = datetime.strptime(value[:size], "%B").month return {"month": month}, value[size:]
[docs] class MMMMM(MMM): """ First letter of the month (e.g., "J" for June). """ regex = "mmmmm"
[docs] def format(self, value: Union[date, datetime, time], tokens: List[Token]) -> str: return value.strftime("%B")[0]
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: letter = value[0] mapping = defaultdict(list) for i in range(1, 13): mapping[calendar.month_name[i][0]].append(i) if len(mapping[letter]) == 0: raise Exception(f"Unable to find month letter: {letter}") if len(mapping[letter]) > 1: raise Exception(f"Unable to parse month letter unambiguously: {letter}") return {"month": mapping[letter][0]}, value[1:]
[docs] class S(Token): """ Seconds in the minute without a leading 0. """ regex = "s(?!s)"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: return str(value.second)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d{1,2}", value) if not match: raise Exception(f"Cannot parse value: {value}") # leap seconds can be 60 or even 61 size = len(match.group()) if 0 <= int(match.group()) <= 61 else 1 return {"second": int(value[:size])}, value[size:]
[docs] class SS(S): """ Seconds in the minute with a leading 0. """ regex = "ss"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: return super().format(value, tokens).zfill(2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: return {"second": int(value[:2])}, value[2:]
[docs] class DurationToken(Token): # pylint: disable=abstract-method """ A token for durations. Durations are special because often only the first token is annotated. For example: - [h]:mm:ss - [ss].000 But apparently it is valid to annotate subsequent tokens: - [hh]:[mm]:[ss].000 Who knows? Because of this, their regexes are dynamic, and depend on the token history. """ is_duration = True regexes: Tuple[str, str]
[docs] @classmethod def match( cls, pattern: str, history: List[Token], ) -> bool: if any(isinstance(token, DurationToken) for token in history): regex = cls.regexes[1] else: regex = cls.regexes[0] return bool(re.match(regex, pattern))
[docs] @classmethod def consume( cls, pattern: str, history: List[Token], ) -> Tuple[Token, str]: if any(isinstance(token, DurationToken) for token in history): regex = cls.regexes[1] else: regex = cls.regexes[0] match = re.match(regex, pattern) if not match: # pylint: disable=broad-exception-raised raise Exception("Token could not find match") token = match.group() return cls(token), pattern[len(token) :]
[docs] class HPlusDuration(DurationToken): """ Number of elapsed hours in a time duration. Number of letters indicates minimum number of digits (adds leading 0s). """ regexes = (r"\[h+\]", r"(h+)|(\[h+\])")
[docs] def format(self, value: Union[timedelta], tokens: List[Token]) -> str: return str(int(value.total_seconds() // 3600)).zfill(len(self.token) - 2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d+", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) return {"hours": int(value[:size])}, value[size:]
[docs] class MPlusDuration(DurationToken): """ Number of elapsed minutes in a time duration. Number of letters indicates minimum number of digits (adds leading 0s). """ regexes = (r"\[m+\]", r"(m+)|(\[m+\])")
[docs] def format(self, value: Union[timedelta], tokens: List[Token]) -> str: seconds = value.total_seconds() if any(token.__class__.__name__ == "HPlusDuration" for token in tokens): # ignore hours seconds %= 3600 return str(int(seconds // 60)).zfill(len(self.token) - 2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d+", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) return {"minutes": int(value[:size])}, value[size:]
[docs] class SPlusDuration(DurationToken): """ Number of elapsed seconds in a time duration. Number of letters indicates minimum number of digits (adds leading 0s). """ regexes = (r"\[s+\]", r"(s+)|(\[s+\])")
[docs] def format(self, value: Union[timedelta], tokens: List[Token]) -> str: seconds = value.total_seconds() if any(token.__class__.__name__ == "HPlusDuration" for token in tokens): # ignore hours seconds %= 3600 if any(token.__class__.__name__ == "MPlusDuration" for token in tokens): # ignore minutes seconds %= 60 return str(int(seconds)).zfill(len(self.token) - 2)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d+", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) return {"seconds": int(value[:size])}, value[size:]
[docs] class D(Token): """ Day of the month, no leading 0 for numbers less than 10. """ regex = "d(?!d)"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return str(value.day)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: match = re.match(r"\d{1,2}", value) if not match: raise Exception(f"Cannot parse value: {value}") size = len(match.group()) if 1 <= int(match.group()) <= 31 else 1 return {"day": int(value[:size])}, value[size:]
[docs] class DD(D): """ Day of the month, with a leading 0 for numbers less than 10. """ regex = "dd(?!d)"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return value.strftime("%d")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: return {"day": int(value[:2])}, value[2:]
[docs] class DDD(D): """ Day of the week, three letter abbreviation (e.g., "Mon"). """ regex = "ddd(?!d)"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return value.strftime("%a")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: return {"weekday": datetime.strptime(value[:3], "%a").weekday()}, value[3:]
[docs] class DDDDPlus(D): """ Day of the week, full name. """ regex = "d{4,}"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return value.strftime("%A")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: token = re.split(r"\b", value, 2)[1] size = len(token) return {"weekday": datetime.strptime(value[:size], "%A").weekday()}, value[ size: ]
[docs] class YY(Token): """ 2-digit year. """ regex = "y{1,2}(?!y)"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return value.strftime("%y")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: # assume 00 == 2000 year = int(value[:2]) + 2000 return {"year": year}, value[2:]
[docs] class YYYY(Token): """ 4-digit year. """ regex = "y{3,}"
[docs] def format(self, value: Union[date, datetime], tokens: List[Token]) -> str: return value.strftime("%Y")
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: return {"year": int(value[:4])}, value[4:]
[docs] class ZERO(Token): """ Tenths of seconds. You can increase the precision to 2 digits with 00 or 3 digits (milliseconds) with 000. """ regex = "0{1,3}(?!0)"
[docs] def format( self, value: Union[datetime, time, timedelta], tokens: List[Token], ) -> str: precision = len(self.token) us = value.microseconds if isinstance(value, timedelta) else value.microsecond rounded = round(us / 1e6, precision) return str(int(rounded * 10**precision)).zfill(precision)
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: size = len(self.token) # adjust precision token = value[:size] token += "0" * (6 - size) microsecond = int(token) return {"microsecond": microsecond}, value[size:]
[docs] class AP(Token): """ Displays "a" for AM, and "p" for PM. Also changes hours to 12-hour format. If the token letter is capitalized, the output is as well. """ regex = "(a/p)|(A/P)"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: output = "a" if value.hour < 12 else "p" if self.token == "A/P": output = output.upper() return output
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: letter = value[:1] meridiem = Meridiem.PM if letter.upper() == "P" else Meridiem.AM return {"meridiem": meridiem}, value[1:]
[docs] class AMPM(AP): """ As above, but displays "AM" or "PM" instead and is always capitalized. """ regex = "am/pm"
[docs] def format(self, value: Union[datetime, time], tokens: List[Token]) -> str: return "AM" if value.hour < 12 else "PM"
[docs] def parse(self, value: str, tokens: List[Token]) -> Tuple[Dict[str, Any], str]: letter = value[:2] meridiem = Meridiem.PM if letter.upper() == "PM" else Meridiem.AM return {"meridiem": meridiem}, value[2:]
[docs] def infer_column_type(pattern: str) -> str: """ Infer the correct date-related type. GSheets returns ``datetime`` as the type for timestamps, but also for time of day and durations. We need to parse the pattern to figure out the exact type. """ classes = [ # durations should come first because they need to be modified # after the first capture HPlusDuration, MPlusDuration, SPlusDuration, # then the rest H, HHPlus, M, MM, MMM, MMMM, MMMMM, S, SS, D, DD, DDD, DDDDPlus, YY, YYYY, AP, AMPM, ZERO, LITERAL, ] tokens = list(tokenize(pattern, classes)) if any(isinstance(token, DurationToken) for token in tokens): return "duration" datetime_tokens = (D, DD, DDD, DDDDPlus, YY, YYYY) if any(isinstance(token, datetime_tokens) for token in tokens): return "datetime" return "timeofday"
[docs] def parse_date_time_pattern( value: str, pattern: str, class_: Type[DateTime], ) -> DateTime: """ Parse a value using a given pattern. See https://developers.google.com/sheets/api/guides/formats?hl=en. """ classes = [ # durations should come first because they need to be modified # after the first capture HPlusDuration, MPlusDuration, SPlusDuration, # then the rest H, HHPlus, M, MM, MMM, MMMM, MMMMM, S, SS, D, DD, DDD, DDDDPlus, YY, YYYY, AP, AMPM, ZERO, LITERAL, ] kwargs: Dict[str, Any] = {} tokens = list(tokenize(pattern, classes)) for token in tokens: consumed, value = token.parse(value, tokens) kwargs.update(**consumed) # add PM offset if "hour" in kwargs: meridiem = kwargs.pop("meridiem", None) if meridiem == Meridiem.PM and kwargs["hour"] != 12: kwargs["hour"] += 12 elif meridiem == Meridiem.AM and kwargs["hour"] == 12: kwargs["hour"] -= 12 # we can't really do anything with ``weekday`` if "weekday" in kwargs: del kwargs["weekday"] if "microsecond" in kwargs and class_ is timedelta: kwargs["microseconds"] = kwargs.pop("microsecond") try: return class_(**kwargs) except TypeError as ex: raise Exception("Unsupported format") from ex
[docs] def format_date_time_pattern(value: DateTime, pattern: str) -> str: """ Format a date/time related object to a given pattern. See https://developers.google.com/sheets/api/guides/formats?hl=en. """ classes = [ # durations should come first because they need to be modified # after the first capture HPlusDuration, MPlusDuration, SPlusDuration, # then the rest H, HHPlus, M, MM, MMM, MMMM, MMMMM, S, SS, D, DD, DDD, DDDDPlus, YY, YYYY, AP, AMPM, ZERO, LITERAL, ] parts = [] tokens = list(tokenize(pattern, classes)) for token in tokens: parts.append(token.format(value, tokens)) return "".join(parts)