Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,12 @@
# ruff: noqa: TCH004
from typing import TYPE_CHECKING
if TYPE_CHECKING:
# import modules that have public classes/functions:
from pandas.tseries import (
frequencies,
offsets,
)
# and mark only those modules as public
__all__ = ["frequencies", "offsets"]

View File

@ -0,0 +1,10 @@
"""
Timeseries API
"""
from pandas._libs.tslibs.parsing import guess_datetime_format
from pandas.tseries import offsets
from pandas.tseries.frequencies import infer_freq
__all__ = ["infer_freq", "offsets", "guess_datetime_format"]

View File

@ -0,0 +1,602 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
from pandas._libs import lib
from pandas._libs.algos import unique_deltas
from pandas._libs.tslibs import (
Timestamp,
get_unit_from_dtype,
periods_per_day,
tz_convert_from_utc,
)
from pandas._libs.tslibs.ccalendar import (
DAYS,
MONTH_ALIASES,
MONTH_NUMBERS,
MONTHS,
int_to_weekday,
)
from pandas._libs.tslibs.dtypes import (
OFFSET_TO_PERIOD_FREQSTR,
freq_to_period_freqstr,
)
from pandas._libs.tslibs.fields import (
build_field_sarray,
month_position_check,
)
from pandas._libs.tslibs.offsets import (
DateOffset,
Day,
to_offset,
)
from pandas._libs.tslibs.parsing import get_rule_month
from pandas.util._decorators import cache_readonly
from pandas.core.dtypes.common import is_numeric_dtype
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)
from pandas.core.algorithms import unique
if TYPE_CHECKING:
from pandas._typing import npt
from pandas import (
DatetimeIndex,
Series,
TimedeltaIndex,
)
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
# --------------------------------------------------------------------
# Offset related functions
_need_suffix = ["QS", "BQE", "BQS", "YS", "BYE", "BYS"]
for _prefix in _need_suffix:
for _m in MONTHS:
key = f"{_prefix}-{_m}"
OFFSET_TO_PERIOD_FREQSTR[key] = OFFSET_TO_PERIOD_FREQSTR[_prefix]
for _prefix in ["Y", "Q"]:
for _m in MONTHS:
_alias = f"{_prefix}-{_m}"
OFFSET_TO_PERIOD_FREQSTR[_alias] = _alias
for _d in DAYS:
OFFSET_TO_PERIOD_FREQSTR[f"W-{_d}"] = f"W-{_d}"
def get_period_alias(offset_str: str) -> str | None:
"""
Alias to closest period strings BQ->Q etc.
"""
return OFFSET_TO_PERIOD_FREQSTR.get(offset_str, None)
# ---------------------------------------------------------------------
# Period codes
def infer_freq(
index: DatetimeIndex | TimedeltaIndex | Series | DatetimeLikeArrayMixin,
) -> str | None:
"""
Infer the most likely frequency given the input index.
Parameters
----------
index : DatetimeIndex, TimedeltaIndex, Series or array-like
If passed a Series will use the values of the series (NOT THE INDEX).
Returns
-------
str or None
None if no discernible frequency.
Raises
------
TypeError
If the index is not datetime-like.
ValueError
If there are fewer than three values.
Examples
--------
>>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30)
>>> pd.infer_freq(idx)
'D'
"""
from pandas.core.api import DatetimeIndex
if isinstance(index, ABCSeries):
values = index._values
if not (
lib.is_np_dtype(values.dtype, "mM")
or isinstance(values.dtype, DatetimeTZDtype)
or values.dtype == object
):
raise TypeError(
"cannot infer freq from a non-convertible dtype "
f"on a Series of {index.dtype}"
)
index = values
inferer: _FrequencyInferer
if not hasattr(index, "dtype"):
pass
elif isinstance(index.dtype, PeriodDtype):
raise TypeError(
"PeriodIndex given. Check the `freq` attribute "
"instead of using infer_freq."
)
elif lib.is_np_dtype(index.dtype, "m"):
# Allow TimedeltaIndex and TimedeltaArray
inferer = _TimedeltaFrequencyInferer(index)
return inferer.get_freq()
elif is_numeric_dtype(index.dtype):
raise TypeError(
f"cannot infer freq from a non-convertible index of dtype {index.dtype}"
)
if not isinstance(index, DatetimeIndex):
index = DatetimeIndex(index)
inferer = _FrequencyInferer(index)
return inferer.get_freq()
class _FrequencyInferer:
"""
Not sure if I can avoid the state machine here
"""
def __init__(self, index) -> None:
self.index = index
self.i8values = index.asi8
# For get_unit_from_dtype we need the dtype to the underlying ndarray,
# which for tz-aware is not the same as index.dtype
if isinstance(index, ABCIndex):
# error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
# ndarray[Any, Any]]" has no attribute "_ndarray"
self._creso = get_unit_from_dtype(
index._data._ndarray.dtype # type: ignore[union-attr]
)
else:
# otherwise we have DTA/TDA
self._creso = get_unit_from_dtype(index._ndarray.dtype)
# This moves the values, which are implicitly in UTC, to the
# the timezone so they are in local time
if hasattr(index, "tz"):
if index.tz is not None:
self.i8values = tz_convert_from_utc(
self.i8values, index.tz, reso=self._creso
)
if len(index) < 3:
raise ValueError("Need at least 3 dates to infer frequency")
self.is_monotonic = (
self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing
)
@cache_readonly
def deltas(self) -> npt.NDArray[np.int64]:
return unique_deltas(self.i8values)
@cache_readonly
def deltas_asi8(self) -> npt.NDArray[np.int64]:
# NB: we cannot use self.i8values here because we may have converted
# the tz in __init__
return unique_deltas(self.index.asi8)
@cache_readonly
def is_unique(self) -> bool:
return len(self.deltas) == 1
@cache_readonly
def is_unique_asi8(self) -> bool:
return len(self.deltas_asi8) == 1
def get_freq(self) -> str | None:
"""
Find the appropriate frequency string to describe the inferred
frequency of self.i8values
Returns
-------
str or None
"""
if not self.is_monotonic or not self.index._is_unique:
return None
delta = self.deltas[0]
ppd = periods_per_day(self._creso)
if delta and _is_multiple(delta, ppd):
return self._infer_daily_rule()
# Business hourly, maybe. 17: one day / 65: one weekend
if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
return "bh"
# Possibly intraday frequency. Here we use the
# original .asi8 values as the modified values
# will not work around DST transitions. See #8772
if not self.is_unique_asi8:
return None
delta = self.deltas_asi8[0]
pph = ppd // 24
ppm = pph // 60
pps = ppm // 60
if _is_multiple(delta, pph):
# Hours
return _maybe_add_count("h", delta / pph)
elif _is_multiple(delta, ppm):
# Minutes
return _maybe_add_count("min", delta / ppm)
elif _is_multiple(delta, pps):
# Seconds
return _maybe_add_count("s", delta / pps)
elif _is_multiple(delta, (pps // 1000)):
# Milliseconds
return _maybe_add_count("ms", delta / (pps // 1000))
elif _is_multiple(delta, (pps // 1_000_000)):
# Microseconds
return _maybe_add_count("us", delta / (pps // 1_000_000))
else:
# Nanoseconds
return _maybe_add_count("ns", delta)
@cache_readonly
def day_deltas(self) -> list[int]:
ppd = periods_per_day(self._creso)
return [x / ppd for x in self.deltas]
@cache_readonly
def hour_deltas(self) -> list[int]:
pph = periods_per_day(self._creso) // 24
return [x / pph for x in self.deltas]
@cache_readonly
def fields(self) -> np.ndarray: # structured array of fields
return build_field_sarray(self.i8values, reso=self._creso)
@cache_readonly
def rep_stamp(self) -> Timestamp:
return Timestamp(self.i8values[0], unit=self.index.unit)
def month_position_check(self) -> str | None:
return month_position_check(self.fields, self.index.dayofweek)
@cache_readonly
def mdiffs(self) -> npt.NDArray[np.int64]:
nmonths = self.fields["Y"] * 12 + self.fields["M"]
return unique_deltas(nmonths.astype("i8"))
@cache_readonly
def ydiffs(self) -> npt.NDArray[np.int64]:
return unique_deltas(self.fields["Y"].astype("i8"))
def _infer_daily_rule(self) -> str | None:
annual_rule = self._get_annual_rule()
if annual_rule:
nyears = self.ydiffs[0]
month = MONTH_ALIASES[self.rep_stamp.month]
alias = f"{annual_rule}-{month}"
return _maybe_add_count(alias, nyears)
quarterly_rule = self._get_quarterly_rule()
if quarterly_rule:
nquarters = self.mdiffs[0] / 3
mod_dict = {0: 12, 2: 11, 1: 10}
month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
alias = f"{quarterly_rule}-{month}"
return _maybe_add_count(alias, nquarters)
monthly_rule = self._get_monthly_rule()
if monthly_rule:
return _maybe_add_count(monthly_rule, self.mdiffs[0])
if self.is_unique:
return self._get_daily_rule()
if self._is_business_daily():
return "B"
wom_rule = self._get_wom_rule()
if wom_rule:
return wom_rule
return None
def _get_daily_rule(self) -> str | None:
ppd = periods_per_day(self._creso)
days = self.deltas[0] / ppd
if days % 7 == 0:
# Weekly
wd = int_to_weekday[self.rep_stamp.weekday()]
alias = f"W-{wd}"
return _maybe_add_count(alias, days / 7)
else:
return _maybe_add_count("D", days)
def _get_annual_rule(self) -> str | None:
if len(self.ydiffs) > 1:
return None
if len(unique(self.fields["M"])) > 1:
return None
pos_check = self.month_position_check()
if pos_check is None:
return None
else:
return {"cs": "YS", "bs": "BYS", "ce": "YE", "be": "BYE"}.get(pos_check)
def _get_quarterly_rule(self) -> str | None:
if len(self.mdiffs) > 1:
return None
if not self.mdiffs[0] % 3 == 0:
return None
pos_check = self.month_position_check()
if pos_check is None:
return None
else:
return {"cs": "QS", "bs": "BQS", "ce": "QE", "be": "BQE"}.get(pos_check)
def _get_monthly_rule(self) -> str | None:
if len(self.mdiffs) > 1:
return None
pos_check = self.month_position_check()
if pos_check is None:
return None
else:
return {"cs": "MS", "bs": "BMS", "ce": "ME", "be": "BME"}.get(pos_check)
def _is_business_daily(self) -> bool:
# quick check: cannot be business daily
if self.day_deltas != [1, 3]:
return False
# probably business daily, but need to confirm
first_weekday = self.index[0].weekday()
shifts = np.diff(self.i8values)
ppd = periods_per_day(self._creso)
shifts = np.floor_divide(shifts, ppd)
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
return bool(
np.all(
((weekdays == 0) & (shifts == 3))
| ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))
)
)
def _get_wom_rule(self) -> str | None:
weekdays = unique(self.index.weekday)
if len(weekdays) > 1:
return None
week_of_months = unique((self.index.day - 1) // 7)
# Only attempt to infer up to WOM-4. See #9425
week_of_months = week_of_months[week_of_months < 4]
if len(week_of_months) == 0 or len(week_of_months) > 1:
return None
# get which week
week = week_of_months[0] + 1
wd = int_to_weekday[weekdays[0]]
return f"WOM-{week}{wd}"
class _TimedeltaFrequencyInferer(_FrequencyInferer):
def _infer_daily_rule(self):
if self.is_unique:
return self._get_daily_rule()
def _is_multiple(us, mult: int) -> bool:
return us % mult == 0
def _maybe_add_count(base: str, count: float) -> str:
if count != 1:
assert count == int(count)
count = int(count)
return f"{count}{base}"
else:
return base
# ----------------------------------------------------------------------
# Frequency comparison
def is_subperiod(source, target) -> bool:
"""
Returns True if downsampling is possible between source and target
frequencies
Parameters
----------
source : str or DateOffset
Frequency converting from
target : str or DateOffset
Frequency converting to
Returns
-------
bool
"""
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)
if _is_annual(target):
if _is_quarterly(source):
return _quarter_months_conform(
get_rule_month(source), get_rule_month(target)
)
return source in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
elif _is_quarterly(target):
return source in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
elif _is_monthly(target):
return source in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif _is_weekly(target):
return source in {target, "D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif target == "B":
return source in {"B", "h", "min", "s", "ms", "us", "ns"}
elif target == "C":
return source in {"C", "h", "min", "s", "ms", "us", "ns"}
elif target == "D":
return source in {"D", "h", "min", "s", "ms", "us", "ns"}
elif target == "h":
return source in {"h", "min", "s", "ms", "us", "ns"}
elif target == "min":
return source in {"min", "s", "ms", "us", "ns"}
elif target == "s":
return source in {"s", "ms", "us", "ns"}
elif target == "ms":
return source in {"ms", "us", "ns"}
elif target == "us":
return source in {"us", "ns"}
elif target == "ns":
return source in {"ns"}
else:
return False
def is_superperiod(source, target) -> bool:
"""
Returns True if upsampling is possible between source and target
frequencies
Parameters
----------
source : str or DateOffset
Frequency converting from
target : str or DateOffset
Frequency converting to
Returns
-------
bool
"""
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)
if _is_annual(source):
if _is_annual(target):
return get_rule_month(source) == get_rule_month(target)
if _is_quarterly(target):
smonth = get_rule_month(source)
tmonth = get_rule_month(target)
return _quarter_months_conform(smonth, tmonth)
return target in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
elif _is_quarterly(source):
return target in {"D", "C", "B", "M", "h", "min", "s", "ms", "us", "ns"}
elif _is_monthly(source):
return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif _is_weekly(source):
return target in {source, "D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif source == "B":
return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif source == "C":
return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif source == "D":
return target in {"D", "C", "B", "h", "min", "s", "ms", "us", "ns"}
elif source == "h":
return target in {"h", "min", "s", "ms", "us", "ns"}
elif source == "min":
return target in {"min", "s", "ms", "us", "ns"}
elif source == "s":
return target in {"s", "ms", "us", "ns"}
elif source == "ms":
return target in {"ms", "us", "ns"}
elif source == "us":
return target in {"us", "ns"}
elif source == "ns":
return target in {"ns"}
else:
return False
def _maybe_coerce_freq(code) -> str:
"""we might need to coerce a code to a rule_code
and uppercase it
Parameters
----------
source : str or DateOffset
Frequency converting from
Returns
-------
str
"""
assert code is not None
if isinstance(code, DateOffset):
code = freq_to_period_freqstr(1, code.name)
if code in {"h", "min", "s", "ms", "us", "ns"}:
return code
else:
return code.upper()
def _quarter_months_conform(source: str, target: str) -> bool:
snum = MONTH_NUMBERS[source]
tnum = MONTH_NUMBERS[target]
return snum % 3 == tnum % 3
def _is_annual(rule: str) -> bool:
rule = rule.upper()
return rule == "Y" or rule.startswith("Y-")
def _is_quarterly(rule: str) -> bool:
rule = rule.upper()
return rule == "Q" or rule.startswith(("Q-", "BQ"))
def _is_monthly(rule: str) -> bool:
rule = rule.upper()
return rule in ("M", "BM")
def _is_weekly(rule: str) -> bool:
rule = rule.upper()
return rule == "W" or rule.startswith("W-")
__all__ = [
"Day",
"get_period_alias",
"infer_freq",
"is_subperiod",
"is_superperiod",
"to_offset",
]

View File

@ -0,0 +1,634 @@
from __future__ import annotations
from datetime import (
datetime,
timedelta,
)
import warnings
from dateutil.relativedelta import (
FR,
MO,
SA,
SU,
TH,
TU,
WE,
)
import numpy as np
from pandas.errors import PerformanceWarning
from pandas import (
DateOffset,
DatetimeIndex,
Series,
Timestamp,
concat,
date_range,
)
from pandas.tseries.offsets import (
Day,
Easter,
)
def next_monday(dt: datetime) -> datetime:
"""
If holiday falls on Saturday, use following Monday instead;
if holiday falls on Sunday, use Monday instead
"""
if dt.weekday() == 5:
return dt + timedelta(2)
elif dt.weekday() == 6:
return dt + timedelta(1)
return dt
def next_monday_or_tuesday(dt: datetime) -> datetime:
"""
For second holiday of two adjacent ones!
If holiday falls on Saturday, use following Monday instead;
if holiday falls on Sunday or Monday, use following Tuesday instead
(because Monday is already taken by adjacent holiday on the day before)
"""
dow = dt.weekday()
if dow in (5, 6):
return dt + timedelta(2)
if dow == 0:
return dt + timedelta(1)
return dt
def previous_friday(dt: datetime) -> datetime:
"""
If holiday falls on Saturday or Sunday, use previous Friday instead.
"""
if dt.weekday() == 5:
return dt - timedelta(1)
elif dt.weekday() == 6:
return dt - timedelta(2)
return dt
def sunday_to_monday(dt: datetime) -> datetime:
"""
If holiday falls on Sunday, use day thereafter (Monday) instead.
"""
if dt.weekday() == 6:
return dt + timedelta(1)
return dt
def weekend_to_monday(dt: datetime) -> datetime:
"""
If holiday falls on Sunday or Saturday,
use day thereafter (Monday) instead.
Needed for holidays such as Christmas observation in Europe
"""
if dt.weekday() == 6:
return dt + timedelta(1)
elif dt.weekday() == 5:
return dt + timedelta(2)
return dt
def nearest_workday(dt: datetime) -> datetime:
"""
If holiday falls on Saturday, use day before (Friday) instead;
if holiday falls on Sunday, use day thereafter (Monday) instead.
"""
if dt.weekday() == 5:
return dt - timedelta(1)
elif dt.weekday() == 6:
return dt + timedelta(1)
return dt
def next_workday(dt: datetime) -> datetime:
"""
returns next weekday used for observances
"""
dt += timedelta(days=1)
while dt.weekday() > 4:
# Mon-Fri are 0-4
dt += timedelta(days=1)
return dt
def previous_workday(dt: datetime) -> datetime:
"""
returns previous weekday used for observances
"""
dt -= timedelta(days=1)
while dt.weekday() > 4:
# Mon-Fri are 0-4
dt -= timedelta(days=1)
return dt
def before_nearest_workday(dt: datetime) -> datetime:
"""
returns previous workday after nearest workday
"""
return previous_workday(nearest_workday(dt))
def after_nearest_workday(dt: datetime) -> datetime:
"""
returns next workday after nearest workday
needed for Boxing day or multiple holidays in a series
"""
return next_workday(nearest_workday(dt))
class Holiday:
"""
Class that defines a holiday with start/end dates and rules
for observance.
"""
start_date: Timestamp | None
end_date: Timestamp | None
days_of_week: tuple[int, ...] | None
def __init__(
self,
name: str,
year=None,
month=None,
day=None,
offset=None,
observance=None,
start_date=None,
end_date=None,
days_of_week=None,
) -> None:
"""
Parameters
----------
name : str
Name of the holiday , defaults to class name
offset : array of pandas.tseries.offsets or
class from pandas.tseries.offsets
computes offset from date
observance: function
computes when holiday is given a pandas Timestamp
days_of_week:
provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
Monday=0,..,Sunday=6
Examples
--------
>>> from dateutil.relativedelta import MO
>>> USMemorialDay = pd.tseries.holiday.Holiday(
... "Memorial Day", month=5, day=31, offset=pd.DateOffset(weekday=MO(-1))
... )
>>> USMemorialDay
Holiday: Memorial Day (month=5, day=31, offset=<DateOffset: weekday=MO(-1)>)
>>> USLaborDay = pd.tseries.holiday.Holiday(
... "Labor Day", month=9, day=1, offset=pd.DateOffset(weekday=MO(1))
... )
>>> USLaborDay
Holiday: Labor Day (month=9, day=1, offset=<DateOffset: weekday=MO(+1)>)
>>> July3rd = pd.tseries.holiday.Holiday("July 3rd", month=7, day=3)
>>> July3rd
Holiday: July 3rd (month=7, day=3, )
>>> NewYears = pd.tseries.holiday.Holiday(
... "New Years Day", month=1, day=1,
... observance=pd.tseries.holiday.nearest_workday
... )
>>> NewYears # doctest: +SKIP
Holiday: New Years Day (
month=1, day=1, observance=<function nearest_workday at 0x66545e9bc440>
)
>>> July3rd = pd.tseries.holiday.Holiday(
... "July 3rd", month=7, day=3,
... days_of_week=(0, 1, 2, 3)
... )
>>> July3rd
Holiday: July 3rd (month=7, day=3, )
"""
if offset is not None and observance is not None:
raise NotImplementedError("Cannot use both offset and observance.")
self.name = name
self.year = year
self.month = month
self.day = day
self.offset = offset
self.start_date = (
Timestamp(start_date) if start_date is not None else start_date
)
self.end_date = Timestamp(end_date) if end_date is not None else end_date
self.observance = observance
assert days_of_week is None or type(days_of_week) == tuple
self.days_of_week = days_of_week
def __repr__(self) -> str:
info = ""
if self.year is not None:
info += f"year={self.year}, "
info += f"month={self.month}, day={self.day}, "
if self.offset is not None:
info += f"offset={self.offset}"
if self.observance is not None:
info += f"observance={self.observance}"
repr = f"Holiday: {self.name} ({info})"
return repr
def dates(
self, start_date, end_date, return_name: bool = False
) -> Series | DatetimeIndex:
"""
Calculate holidays observed between start date and end date
Parameters
----------
start_date : starting date, datetime-like, optional
end_date : ending date, datetime-like, optional
return_name : bool, optional, default=False
If True, return a series that has dates and holiday names.
False will only return dates.
Returns
-------
Series or DatetimeIndex
Series if return_name is True
"""
start_date = Timestamp(start_date)
end_date = Timestamp(end_date)
filter_start_date = start_date
filter_end_date = end_date
if self.year is not None:
dt = Timestamp(datetime(self.year, self.month, self.day))
dti = DatetimeIndex([dt])
if return_name:
return Series(self.name, index=dti)
else:
return dti
dates = self._reference_dates(start_date, end_date)
holiday_dates = self._apply_rule(dates)
if self.days_of_week is not None:
holiday_dates = holiday_dates[
np.isin(
# error: "DatetimeIndex" has no attribute "dayofweek"
holiday_dates.dayofweek, # type: ignore[attr-defined]
self.days_of_week,
).ravel()
]
if self.start_date is not None:
filter_start_date = max(
self.start_date.tz_localize(filter_start_date.tz), filter_start_date
)
if self.end_date is not None:
filter_end_date = min(
self.end_date.tz_localize(filter_end_date.tz), filter_end_date
)
holiday_dates = holiday_dates[
(holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date)
]
if return_name:
return Series(self.name, index=holiday_dates)
return holiday_dates
def _reference_dates(
self, start_date: Timestamp, end_date: Timestamp
) -> DatetimeIndex:
"""
Get reference dates for the holiday.
Return reference dates for the holiday also returning the year
prior to the start_date and year following the end_date. This ensures
that any offsets to be applied will yield the holidays within
the passed in dates.
"""
if self.start_date is not None:
start_date = self.start_date.tz_localize(start_date.tz)
if self.end_date is not None:
end_date = self.end_date.tz_localize(start_date.tz)
year_offset = DateOffset(years=1)
reference_start_date = Timestamp(
datetime(start_date.year - 1, self.month, self.day)
)
reference_end_date = Timestamp(
datetime(end_date.year + 1, self.month, self.day)
)
# Don't process unnecessary holidays
dates = date_range(
start=reference_start_date,
end=reference_end_date,
freq=year_offset,
tz=start_date.tz,
)
return dates
def _apply_rule(self, dates: DatetimeIndex) -> DatetimeIndex:
"""
Apply the given offset/observance to a DatetimeIndex of dates.
Parameters
----------
dates : DatetimeIndex
Dates to apply the given offset/observance rule
Returns
-------
Dates with rules applied
"""
if dates.empty:
return dates.copy()
if self.observance is not None:
return dates.map(lambda d: self.observance(d))
if self.offset is not None:
if not isinstance(self.offset, list):
offsets = [self.offset]
else:
offsets = self.offset
for offset in offsets:
# if we are adding a non-vectorized value
# ignore the PerformanceWarnings:
with warnings.catch_warnings():
warnings.simplefilter("ignore", PerformanceWarning)
dates += offset
return dates
holiday_calendars = {}
def register(cls) -> None:
try:
name = cls.name
except AttributeError:
name = cls.__name__
holiday_calendars[name] = cls
def get_calendar(name: str):
"""
Return an instance of a calendar based on its name.
Parameters
----------
name : str
Calendar name to return an instance of
"""
return holiday_calendars[name]()
class HolidayCalendarMetaClass(type):
def __new__(cls, clsname: str, bases, attrs):
calendar_class = super().__new__(cls, clsname, bases, attrs)
register(calendar_class)
return calendar_class
class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass):
"""
Abstract interface to create holidays following certain rules.
"""
rules: list[Holiday] = []
start_date = Timestamp(datetime(1970, 1, 1))
end_date = Timestamp(datetime(2200, 12, 31))
_cache = None
def __init__(self, name: str = "", rules=None) -> None:
"""
Initializes holiday object with a given set a rules. Normally
classes just have the rules defined within them.
Parameters
----------
name : str
Name of the holiday calendar, defaults to class name
rules : array of Holiday objects
A set of rules used to create the holidays.
"""
super().__init__()
if not name:
name = type(self).__name__
self.name = name
if rules is not None:
self.rules = rules
def rule_from_name(self, name: str):
for rule in self.rules:
if rule.name == name:
return rule
return None
def holidays(self, start=None, end=None, return_name: bool = False):
"""
Returns a curve with holidays between start_date and end_date
Parameters
----------
start : starting date, datetime-like, optional
end : ending date, datetime-like, optional
return_name : bool, optional
If True, return a series that has dates and holiday names.
False will only return a DatetimeIndex of dates.
Returns
-------
DatetimeIndex of holidays
"""
if self.rules is None:
raise Exception(
f"Holiday Calendar {self.name} does not have any rules specified"
)
if start is None:
start = AbstractHolidayCalendar.start_date
if end is None:
end = AbstractHolidayCalendar.end_date
start = Timestamp(start)
end = Timestamp(end)
# If we don't have a cache or the dates are outside the prior cache, we
# get them again
if self._cache is None or start < self._cache[0] or end > self._cache[1]:
pre_holidays = [
rule.dates(start, end, return_name=True) for rule in self.rules
]
if pre_holidays:
# error: Argument 1 to "concat" has incompatible type
# "List[Union[Series, DatetimeIndex]]"; expected
# "Union[Iterable[DataFrame], Mapping[<nothing>, DataFrame]]"
holidays = concat(pre_holidays) # type: ignore[arg-type]
else:
# error: Incompatible types in assignment (expression has type
# "Series", variable has type "DataFrame")
holidays = Series(
index=DatetimeIndex([]), dtype=object
) # type: ignore[assignment]
self._cache = (start, end, holidays.sort_index())
holidays = self._cache[2]
holidays = holidays[start:end]
if return_name:
return holidays
else:
return holidays.index
@staticmethod
def merge_class(base, other):
"""
Merge holiday calendars together. The base calendar
will take precedence to other. The merge will be done
based on each holiday's name.
Parameters
----------
base : AbstractHolidayCalendar
instance/subclass or array of Holiday objects
other : AbstractHolidayCalendar
instance/subclass or array of Holiday objects
"""
try:
other = other.rules
except AttributeError:
pass
if not isinstance(other, list):
other = [other]
other_holidays = {holiday.name: holiday for holiday in other}
try:
base = base.rules
except AttributeError:
pass
if not isinstance(base, list):
base = [base]
base_holidays = {holiday.name: holiday for holiday in base}
other_holidays.update(base_holidays)
return list(other_holidays.values())
def merge(self, other, inplace: bool = False):
"""
Merge holiday calendars together. The caller's class
rules take precedence. The merge will be done
based on each holiday's name.
Parameters
----------
other : holiday calendar
inplace : bool (default=False)
If True set rule_table to holidays, else return array of Holidays
"""
holidays = self.merge_class(self, other)
if inplace:
self.rules = holidays
else:
return holidays
USMemorialDay = Holiday(
"Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
)
USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
USColumbusDay = Holiday(
"Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
)
USThanksgivingDay = Holiday(
"Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4))
)
USMartinLutherKingJr = Holiday(
"Birthday of Martin Luther King, Jr.",
start_date=datetime(1986, 1, 1),
month=1,
day=1,
offset=DateOffset(weekday=MO(3)),
)
USPresidentsDay = Holiday(
"Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))
)
GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)])
class USFederalHolidayCalendar(AbstractHolidayCalendar):
"""
US Federal Government Holiday Calendar based on rules specified by:
https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/
"""
rules = [
Holiday("New Year's Day", month=1, day=1, observance=nearest_workday),
USMartinLutherKingJr,
USPresidentsDay,
USMemorialDay,
Holiday(
"Juneteenth National Independence Day",
month=6,
day=19,
start_date="2021-06-18",
observance=nearest_workday,
),
Holiday("Independence Day", month=7, day=4, observance=nearest_workday),
USLaborDay,
USColumbusDay,
Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
USThanksgivingDay,
Holiday("Christmas Day", month=12, day=25, observance=nearest_workday),
]
def HolidayCalendarFactory(name: str, base, other, base_class=AbstractHolidayCalendar):
rules = AbstractHolidayCalendar.merge_class(base, other)
calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
return calendar_class
__all__ = [
"after_nearest_workday",
"before_nearest_workday",
"FR",
"get_calendar",
"HolidayCalendarFactory",
"MO",
"nearest_workday",
"next_monday",
"next_monday_or_tuesday",
"next_workday",
"previous_friday",
"previous_workday",
"register",
"SA",
"SU",
"sunday_to_monday",
"TH",
"TU",
"WE",
"weekend_to_monday",
]

View File

@ -0,0 +1,91 @@
from __future__ import annotations
from pandas._libs.tslibs.offsets import (
FY5253,
BaseOffset,
BDay,
BMonthBegin,
BMonthEnd,
BQuarterBegin,
BQuarterEnd,
BusinessDay,
BusinessHour,
BusinessMonthBegin,
BusinessMonthEnd,
BYearBegin,
BYearEnd,
CBMonthBegin,
CBMonthEnd,
CDay,
CustomBusinessDay,
CustomBusinessHour,
CustomBusinessMonthBegin,
CustomBusinessMonthEnd,
DateOffset,
Day,
Easter,
FY5253Quarter,
Hour,
LastWeekOfMonth,
Micro,
Milli,
Minute,
MonthBegin,
MonthEnd,
Nano,
QuarterBegin,
QuarterEnd,
Second,
SemiMonthBegin,
SemiMonthEnd,
Tick,
Week,
WeekOfMonth,
YearBegin,
YearEnd,
)
__all__ = [
"Day",
"BaseOffset",
"BusinessDay",
"BusinessMonthBegin",
"BusinessMonthEnd",
"BDay",
"CustomBusinessDay",
"CustomBusinessMonthBegin",
"CustomBusinessMonthEnd",
"CDay",
"CBMonthEnd",
"CBMonthBegin",
"MonthBegin",
"BMonthBegin",
"MonthEnd",
"BMonthEnd",
"SemiMonthEnd",
"SemiMonthBegin",
"BusinessHour",
"CustomBusinessHour",
"YearBegin",
"BYearBegin",
"YearEnd",
"BYearEnd",
"QuarterBegin",
"BQuarterBegin",
"QuarterEnd",
"BQuarterEnd",
"LastWeekOfMonth",
"FY5253Quarter",
"FY5253",
"Week",
"WeekOfMonth",
"Easter",
"Tick",
"Hour",
"Minute",
"Second",
"Milli",
"Micro",
"Nano",
"DateOffset",
]