Updated script that can be controled by Nodejs web app
This commit is contained in:
23
lib/python3.13/site-packages/pandas/core/window/__init__.py
Normal file
23
lib/python3.13/site-packages/pandas/core/window/__init__.py
Normal file
@ -0,0 +1,23 @@
|
||||
from pandas.core.window.ewm import (
|
||||
ExponentialMovingWindow,
|
||||
ExponentialMovingWindowGroupby,
|
||||
)
|
||||
from pandas.core.window.expanding import (
|
||||
Expanding,
|
||||
ExpandingGroupby,
|
||||
)
|
||||
from pandas.core.window.rolling import (
|
||||
Rolling,
|
||||
RollingGroupby,
|
||||
Window,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Expanding",
|
||||
"ExpandingGroupby",
|
||||
"ExponentialMovingWindow",
|
||||
"ExponentialMovingWindowGroupby",
|
||||
"Rolling",
|
||||
"RollingGroupby",
|
||||
"Window",
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
169
lib/python3.13/site-packages/pandas/core/window/common.py
Normal file
169
lib/python3.13/site-packages/pandas/core/window/common.py
Normal file
@ -0,0 +1,169 @@
|
||||
"""Common utility functions for rolling operations"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import cast
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.indexes.api import MultiIndex
|
||||
|
||||
|
||||
def flex_binary_moment(arg1, arg2, f, pairwise: bool = False):
|
||||
if isinstance(arg1, ABCSeries) and isinstance(arg2, ABCSeries):
|
||||
X, Y = prep_binary(arg1, arg2)
|
||||
return f(X, Y)
|
||||
|
||||
elif isinstance(arg1, ABCDataFrame):
|
||||
from pandas import DataFrame
|
||||
|
||||
def dataframe_from_int_dict(data, frame_template) -> DataFrame:
|
||||
result = DataFrame(data, index=frame_template.index)
|
||||
if len(result.columns) > 0:
|
||||
result.columns = frame_template.columns[result.columns]
|
||||
else:
|
||||
result.columns = frame_template.columns.copy()
|
||||
return result
|
||||
|
||||
results = {}
|
||||
if isinstance(arg2, ABCDataFrame):
|
||||
if pairwise is False:
|
||||
if arg1 is arg2:
|
||||
# special case in order to handle duplicate column names
|
||||
for i in range(len(arg1.columns)):
|
||||
results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
|
||||
return dataframe_from_int_dict(results, arg1)
|
||||
else:
|
||||
if not arg1.columns.is_unique:
|
||||
raise ValueError("'arg1' columns are not unique")
|
||||
if not arg2.columns.is_unique:
|
||||
raise ValueError("'arg2' columns are not unique")
|
||||
X, Y = arg1.align(arg2, join="outer")
|
||||
X, Y = prep_binary(X, Y)
|
||||
res_columns = arg1.columns.union(arg2.columns)
|
||||
for col in res_columns:
|
||||
if col in X and col in Y:
|
||||
results[col] = f(X[col], Y[col])
|
||||
return DataFrame(results, index=X.index, columns=res_columns)
|
||||
elif pairwise is True:
|
||||
results = defaultdict(dict)
|
||||
for i in range(len(arg1.columns)):
|
||||
for j in range(len(arg2.columns)):
|
||||
if j < i and arg2 is arg1:
|
||||
# Symmetric case
|
||||
results[i][j] = results[j][i]
|
||||
else:
|
||||
results[i][j] = f(
|
||||
*prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
|
||||
)
|
||||
|
||||
from pandas import concat
|
||||
|
||||
result_index = arg1.index.union(arg2.index)
|
||||
if len(result_index):
|
||||
# construct result frame
|
||||
result = concat(
|
||||
[
|
||||
concat(
|
||||
[results[i][j] for j in range(len(arg2.columns))],
|
||||
ignore_index=True,
|
||||
)
|
||||
for i in range(len(arg1.columns))
|
||||
],
|
||||
ignore_index=True,
|
||||
axis=1,
|
||||
)
|
||||
result.columns = arg1.columns
|
||||
|
||||
# set the index and reorder
|
||||
if arg2.columns.nlevels > 1:
|
||||
# mypy needs to know columns is a MultiIndex, Index doesn't
|
||||
# have levels attribute
|
||||
arg2.columns = cast(MultiIndex, arg2.columns)
|
||||
# GH 21157: Equivalent to MultiIndex.from_product(
|
||||
# [result_index], <unique combinations of arg2.columns.levels>,
|
||||
# )
|
||||
# A normal MultiIndex.from_product will produce too many
|
||||
# combinations.
|
||||
result_level = np.tile(
|
||||
result_index, len(result) // len(result_index)
|
||||
)
|
||||
arg2_levels = (
|
||||
np.repeat(
|
||||
arg2.columns.get_level_values(i),
|
||||
len(result) // len(arg2.columns),
|
||||
)
|
||||
for i in range(arg2.columns.nlevels)
|
||||
)
|
||||
result_names = list(arg2.columns.names) + [result_index.name]
|
||||
result.index = MultiIndex.from_arrays(
|
||||
[*arg2_levels, result_level], names=result_names
|
||||
)
|
||||
# GH 34440
|
||||
num_levels = len(result.index.levels)
|
||||
new_order = [num_levels - 1] + list(range(num_levels - 1))
|
||||
result = result.reorder_levels(new_order).sort_index()
|
||||
else:
|
||||
result.index = MultiIndex.from_product(
|
||||
[range(len(arg2.columns)), range(len(result_index))]
|
||||
)
|
||||
result = result.swaplevel(1, 0).sort_index()
|
||||
result.index = MultiIndex.from_product(
|
||||
[result_index] + [arg2.columns]
|
||||
)
|
||||
else:
|
||||
# empty result
|
||||
result = DataFrame(
|
||||
index=MultiIndex(
|
||||
levels=[arg1.index, arg2.columns], codes=[[], []]
|
||||
),
|
||||
columns=arg2.columns,
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
# reset our index names to arg1 names
|
||||
# reset our column names to arg2 names
|
||||
# careful not to mutate the original names
|
||||
result.columns = result.columns.set_names(arg1.columns.names)
|
||||
result.index = result.index.set_names(
|
||||
result_index.names + arg2.columns.names
|
||||
)
|
||||
|
||||
return result
|
||||
else:
|
||||
results = {
|
||||
i: f(*prep_binary(arg1.iloc[:, i], arg2))
|
||||
for i in range(len(arg1.columns))
|
||||
}
|
||||
return dataframe_from_int_dict(results, arg1)
|
||||
|
||||
else:
|
||||
return flex_binary_moment(arg2, arg1, f)
|
||||
|
||||
|
||||
def zsqrt(x):
|
||||
with np.errstate(all="ignore"):
|
||||
result = np.sqrt(x)
|
||||
mask = x < 0
|
||||
|
||||
if isinstance(x, ABCDataFrame):
|
||||
if mask._values.any():
|
||||
result[mask] = 0
|
||||
else:
|
||||
if mask.any():
|
||||
result[mask] = 0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def prep_binary(arg1, arg2):
|
||||
# mask out values, this also makes a common index...
|
||||
X = arg1 + 0 * arg2
|
||||
Y = arg2 + 0 * arg1
|
||||
|
||||
return X, Y
|
116
lib/python3.13/site-packages/pandas/core/window/doc.py
Normal file
116
lib/python3.13/site-packages/pandas/core/window/doc.py
Normal file
@ -0,0 +1,116 @@
|
||||
"""Any shareable docstring components for rolling/expanding/ewm"""
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
|
||||
from pandas.core.shared_docs import _shared_docs
|
||||
|
||||
_shared_docs = dict(**_shared_docs)
|
||||
|
||||
|
||||
def create_section_header(header: str) -> str:
|
||||
"""Create numpydoc section header"""
|
||||
return f"{header}\n{'-' * len(header)}\n"
|
||||
|
||||
|
||||
template_header = "\nCalculate the {window_method} {aggregation_description}.\n\n"
|
||||
|
||||
template_returns = dedent(
|
||||
"""
|
||||
Series or DataFrame
|
||||
Return type is the same as the original object with ``np.float64`` dtype.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
template_see_also = dedent(
|
||||
"""
|
||||
pandas.Series.{window_method} : Calling {window_method} with Series data.
|
||||
pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames.
|
||||
pandas.Series.{agg_method} : Aggregating {agg_method} for Series.
|
||||
pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
kwargs_numeric_only = dedent(
|
||||
"""
|
||||
numeric_only : bool, default False
|
||||
Include only float, int, boolean columns.
|
||||
|
||||
.. versionadded:: 1.5.0\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
kwargs_scipy = dedent(
|
||||
"""
|
||||
**kwargs
|
||||
Keyword arguments to configure the ``SciPy`` weighted window type.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
window_apply_parameters = dedent(
|
||||
"""
|
||||
func : function
|
||||
Must produce a single value from an ndarray input if ``raw=True``
|
||||
or a single value from a Series if ``raw=False``. Can also accept a
|
||||
Numba JIT function with ``engine='numba'`` specified.
|
||||
|
||||
raw : bool, default False
|
||||
* ``False`` : passes each row or column as a Series to the
|
||||
function.
|
||||
* ``True`` : the passed function will receive ndarray
|
||||
objects instead.
|
||||
If you are just applying a NumPy reduction function this will
|
||||
achieve much better performance.
|
||||
|
||||
engine : str, default None
|
||||
* ``'cython'`` : Runs rolling apply through C-extensions from cython.
|
||||
* ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
|
||||
Only available when ``raw`` is set to ``True``.
|
||||
* ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
|
||||
|
||||
engine_kwargs : dict, default None
|
||||
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
|
||||
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
|
||||
and ``parallel`` dictionary keys. The values must either be ``True`` or
|
||||
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
|
||||
``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
|
||||
applied to both the ``func`` and the ``apply`` rolling aggregation.
|
||||
|
||||
args : tuple, default None
|
||||
Positional arguments to be passed into func.
|
||||
|
||||
kwargs : dict, default None
|
||||
Keyword arguments to be passed into func.\n
|
||||
"""
|
||||
).replace("\n", "", 1)
|
||||
|
||||
numba_notes = (
|
||||
"See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
|
||||
"extended documentation and performance considerations for the Numba engine.\n\n"
|
||||
)
|
||||
|
||||
|
||||
def window_agg_numba_parameters(version: str = "1.3") -> str:
|
||||
return (
|
||||
dedent(
|
||||
"""
|
||||
engine : str, default None
|
||||
* ``'cython'`` : Runs the operation through C-extensions from cython.
|
||||
* ``'numba'`` : Runs the operation through JIT compiled code from numba.
|
||||
* ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
|
||||
|
||||
.. versionadded:: {version}.0
|
||||
|
||||
engine_kwargs : dict, default None
|
||||
* For ``'cython'`` engine, there are no accepted ``engine_kwargs``
|
||||
* For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
|
||||
and ``parallel`` dictionary keys. The values must either be ``True`` or
|
||||
``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
|
||||
``{{'nopython': True, 'nogil': False, 'parallel': False}}``
|
||||
|
||||
.. versionadded:: {version}.0\n
|
||||
"""
|
||||
)
|
||||
.replace("\n", "", 1)
|
||||
.replace("{version}", version)
|
||||
)
|
1095
lib/python3.13/site-packages/pandas/core/window/ewm.py
Normal file
1095
lib/python3.13/site-packages/pandas/core/window/ewm.py
Normal file
File diff suppressed because it is too large
Load Diff
964
lib/python3.13/site-packages/pandas/core/window/expanding.py
Normal file
964
lib/python3.13/site-packages/pandas/core/window/expanding.py
Normal file
@ -0,0 +1,964 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Literal,
|
||||
)
|
||||
|
||||
from pandas.util._decorators import (
|
||||
deprecate_kwarg,
|
||||
doc,
|
||||
)
|
||||
|
||||
from pandas.core.indexers.objects import (
|
||||
BaseIndexer,
|
||||
ExpandingIndexer,
|
||||
GroupbyIndexer,
|
||||
)
|
||||
from pandas.core.window.doc import (
|
||||
_shared_docs,
|
||||
create_section_header,
|
||||
kwargs_numeric_only,
|
||||
numba_notes,
|
||||
template_header,
|
||||
template_returns,
|
||||
template_see_also,
|
||||
window_agg_numba_parameters,
|
||||
window_apply_parameters,
|
||||
)
|
||||
from pandas.core.window.rolling import (
|
||||
BaseWindowGroupby,
|
||||
RollingAndExpandingMixin,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
Axis,
|
||||
QuantileInterpolation,
|
||||
WindowingRankType,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.generic import NDFrame
|
||||
|
||||
|
||||
class Expanding(RollingAndExpandingMixin):
|
||||
"""
|
||||
Provide expanding window calculations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_periods : int, default 1
|
||||
Minimum number of observations in window required to have a value;
|
||||
otherwise, result is ``np.nan``.
|
||||
|
||||
axis : int or str, default 0
|
||||
If ``0`` or ``'index'``, roll across the rows.
|
||||
|
||||
If ``1`` or ``'columns'``, roll across the columns.
|
||||
|
||||
For `Series` this parameter is unused and defaults to 0.
|
||||
|
||||
method : str {'single', 'table'}, default 'single'
|
||||
Execute the rolling operation per single column or row (``'single'``)
|
||||
or over the entire object (``'table'``).
|
||||
|
||||
This argument is only implemented when specifying ``engine='numba'``
|
||||
in the method call.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.api.typing.Expanding
|
||||
|
||||
See Also
|
||||
--------
|
||||
rolling : Provides rolling window calculations.
|
||||
ewm : Provides exponential weighted functions.
|
||||
|
||||
Notes
|
||||
-----
|
||||
See :ref:`Windowing Operations <window.expanding>` for further usage details
|
||||
and examples.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
>>> df
|
||||
B
|
||||
0 0.0
|
||||
1 1.0
|
||||
2 2.0
|
||||
3 NaN
|
||||
4 4.0
|
||||
|
||||
**min_periods**
|
||||
|
||||
Expanding sum with 1 vs 3 observations needed to calculate a value.
|
||||
|
||||
>>> df.expanding(1).sum()
|
||||
B
|
||||
0 0.0
|
||||
1 1.0
|
||||
2 3.0
|
||||
3 3.0
|
||||
4 7.0
|
||||
>>> df.expanding(3).sum()
|
||||
B
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 3.0
|
||||
3 3.0
|
||||
4 7.0
|
||||
"""
|
||||
|
||||
_attributes: list[str] = ["min_periods", "axis", "method"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
obj: NDFrame,
|
||||
min_periods: int = 1,
|
||||
axis: Axis = 0,
|
||||
method: str = "single",
|
||||
selection=None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
obj=obj,
|
||||
min_periods=min_periods,
|
||||
axis=axis,
|
||||
method=method,
|
||||
selection=selection,
|
||||
)
|
||||
|
||||
def _get_window_indexer(self) -> BaseIndexer:
|
||||
"""
|
||||
Return an indexer class that will compute the window start and end bounds
|
||||
"""
|
||||
return ExpandingIndexer()
|
||||
|
||||
@doc(
|
||||
_shared_docs["aggregate"],
|
||||
see_also=dedent(
|
||||
"""
|
||||
See Also
|
||||
--------
|
||||
pandas.DataFrame.aggregate : Similar DataFrame method.
|
||||
pandas.Series.aggregate : Similar Series method.
|
||||
"""
|
||||
),
|
||||
examples=dedent(
|
||||
"""
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
|
||||
>>> df
|
||||
A B C
|
||||
0 1 4 7
|
||||
1 2 5 8
|
||||
2 3 6 9
|
||||
|
||||
>>> df.ewm(alpha=0.5).mean()
|
||||
A B C
|
||||
0 1.000000 4.000000 7.000000
|
||||
1 1.666667 4.666667 7.666667
|
||||
2 2.428571 5.428571 8.428571
|
||||
"""
|
||||
),
|
||||
klass="Series/Dataframe",
|
||||
axis="",
|
||||
)
|
||||
def aggregate(self, func, *args, **kwargs):
|
||||
return super().aggregate(func, *args, **kwargs)
|
||||
|
||||
agg = aggregate
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().count()
|
||||
a 1.0
|
||||
b 2.0
|
||||
c 3.0
|
||||
d 4.0
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="count of non NaN observations",
|
||||
agg_method="count",
|
||||
)
|
||||
def count(self, numeric_only: bool = False):
|
||||
return super().count(numeric_only=numeric_only)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
window_apply_parameters,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().apply(lambda s: s.max() - 2 * s.min())
|
||||
a -1.0
|
||||
b 0.0
|
||||
c 1.0
|
||||
d 2.0
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="custom aggregation function",
|
||||
agg_method="apply",
|
||||
)
|
||||
def apply(
|
||||
self,
|
||||
func: Callable[..., Any],
|
||||
raw: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
args: tuple[Any, ...] | None = None,
|
||||
kwargs: dict[str, Any] | None = None,
|
||||
):
|
||||
return super().apply(
|
||||
func,
|
||||
raw=raw,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters(),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().sum()
|
||||
a 1.0
|
||||
b 3.0
|
||||
c 6.0
|
||||
d 10.0
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="sum",
|
||||
agg_method="sum",
|
||||
)
|
||||
def sum(
|
||||
self,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().sum(
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters(),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([3, 2, 1, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().max()
|
||||
a 3.0
|
||||
b 3.0
|
||||
c 3.0
|
||||
d 4.0
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="maximum",
|
||||
agg_method="max",
|
||||
)
|
||||
def max(
|
||||
self,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().max(
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters(),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([2, 3, 4, 1], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().min()
|
||||
a 2.0
|
||||
b 2.0
|
||||
c 2.0
|
||||
d 1.0
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="minimum",
|
||||
agg_method="min",
|
||||
)
|
||||
def min(
|
||||
self,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().min(
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters(),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().mean()
|
||||
a 1.0
|
||||
b 1.5
|
||||
c 2.0
|
||||
d 2.5
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="mean",
|
||||
agg_method="mean",
|
||||
)
|
||||
def mean(
|
||||
self,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().mean(
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters(),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
numba_notes,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser.expanding().median()
|
||||
a 1.0
|
||||
b 1.5
|
||||
c 2.0
|
||||
d 2.5
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="median",
|
||||
agg_method="median",
|
||||
)
|
||||
def median(
|
||||
self,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().median(
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters("1.4"),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"numpy.std : Equivalent method for NumPy array.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
The default ``ddof`` of 1 used in :meth:`Series.std` is different
|
||||
than the default ``ddof`` of 0 in :func:`numpy.std`.
|
||||
|
||||
A minimum of one period is required for the rolling calculation.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
||||
|
||||
>>> s.expanding(3).std()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 0.577350
|
||||
3 0.957427
|
||||
4 0.894427
|
||||
5 0.836660
|
||||
6 0.786796
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="standard deviation",
|
||||
agg_method="std",
|
||||
)
|
||||
def std(
|
||||
self,
|
||||
ddof: int = 1,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().std(
|
||||
ddof=ddof,
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
window_agg_numba_parameters("1.4"),
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"numpy.var : Equivalent method for NumPy array.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
The default ``ddof`` of 1 used in :meth:`Series.var` is different
|
||||
than the default ``ddof`` of 0 in :func:`numpy.var`.
|
||||
|
||||
A minimum of one period is required for the rolling calculation.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
||||
|
||||
>>> s.expanding(3).var()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 0.333333
|
||||
3 0.916667
|
||||
4 0.800000
|
||||
5 0.700000
|
||||
6 0.619048
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="variance",
|
||||
agg_method="var",
|
||||
)
|
||||
def var(
|
||||
self,
|
||||
ddof: int = 1,
|
||||
numeric_only: bool = False,
|
||||
engine: Literal["cython", "numba"] | None = None,
|
||||
engine_kwargs: dict[str, bool] | None = None,
|
||||
):
|
||||
return super().var(
|
||||
ddof=ddof,
|
||||
numeric_only=numeric_only,
|
||||
engine=engine,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of one period is required for the calculation.\n\n",
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([0, 1, 2, 3])
|
||||
|
||||
>>> s.expanding().sem()
|
||||
0 NaN
|
||||
1 0.707107
|
||||
2 0.707107
|
||||
3 0.745356
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="standard error of mean",
|
||||
agg_method="sem",
|
||||
)
|
||||
def sem(self, ddof: int = 1, numeric_only: bool = False):
|
||||
return super().sem(ddof=ddof, numeric_only=numeric_only)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"scipy.stats.skew : Third moment of a probability density.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of three periods is required for the rolling calculation.\n\n",
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([-1, 0, 2, -1, 2], index=['a', 'b', 'c', 'd', 'e'])
|
||||
>>> ser.expanding().skew()
|
||||
a NaN
|
||||
b NaN
|
||||
c 0.935220
|
||||
d 1.414214
|
||||
e 0.315356
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="unbiased skewness",
|
||||
agg_method="skew",
|
||||
)
|
||||
def skew(self, numeric_only: bool = False):
|
||||
return super().skew(numeric_only=numeric_only)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
"scipy.stats.kurtosis : Reference SciPy method.\n",
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
"A minimum of four periods is required for the calculation.\n\n",
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
The example below will show a rolling calculation with a window size of
|
||||
four matching the equivalent function call using `scipy.stats`.
|
||||
|
||||
>>> arr = [1, 2, 3, 4, 999]
|
||||
>>> import scipy.stats
|
||||
>>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
|
||||
-1.200000
|
||||
>>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")
|
||||
4.999874
|
||||
>>> s = pd.Series(arr)
|
||||
>>> s.expanding(4).kurt()
|
||||
0 NaN
|
||||
1 NaN
|
||||
2 NaN
|
||||
3 -1.200000
|
||||
4 4.999874
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="Fisher's definition of kurtosis without bias",
|
||||
agg_method="kurt",
|
||||
)
|
||||
def kurt(self, numeric_only: bool = False):
|
||||
return super().kurt(numeric_only=numeric_only)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
quantile : float
|
||||
Quantile to compute. 0 <= quantile <= 1.
|
||||
|
||||
.. deprecated:: 2.1.0
|
||||
This will be renamed to 'q' in a future version.
|
||||
interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
|
||||
This optional parameter specifies the interpolation method to use,
|
||||
when the desired quantile lies between two data points `i` and `j`:
|
||||
|
||||
* linear: `i + (j - i) * fraction`, where `fraction` is the
|
||||
fractional part of the index surrounded by `i` and `j`.
|
||||
* lower: `i`.
|
||||
* higher: `j`.
|
||||
* nearest: `i` or `j` whichever is nearest.
|
||||
* midpoint: (`i` + `j`) / 2.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser = pd.Series([1, 2, 3, 4, 5, 6], index=['a', 'b', 'c', 'd', 'e', 'f'])
|
||||
>>> ser.expanding(min_periods=4).quantile(.25)
|
||||
a NaN
|
||||
b NaN
|
||||
c NaN
|
||||
d 1.75
|
||||
e 2.00
|
||||
f 2.25
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="quantile",
|
||||
agg_method="quantile",
|
||||
)
|
||||
@deprecate_kwarg(old_arg_name="quantile", new_arg_name="q")
|
||||
def quantile(
|
||||
self,
|
||||
q: float,
|
||||
interpolation: QuantileInterpolation = "linear",
|
||||
numeric_only: bool = False,
|
||||
):
|
||||
return super().quantile(
|
||||
q=q,
|
||||
interpolation=interpolation,
|
||||
numeric_only=numeric_only,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
".. versionadded:: 1.4.0 \n\n",
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
method : {{'average', 'min', 'max'}}, default 'average'
|
||||
How to rank the group of records that have the same value (i.e. ties):
|
||||
|
||||
* average: average rank of the group
|
||||
* min: lowest rank in the group
|
||||
* max: highest rank in the group
|
||||
|
||||
ascending : bool, default True
|
||||
Whether or not the elements should be ranked in ascending order.
|
||||
pct : bool, default False
|
||||
Whether or not to display the returned rankings in percentile
|
||||
form.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""
|
||||
>>> s = pd.Series([1, 4, 2, 3, 5, 3])
|
||||
>>> s.expanding().rank()
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 3.5
|
||||
dtype: float64
|
||||
|
||||
>>> s.expanding().rank(method="max")
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 4.0
|
||||
dtype: float64
|
||||
|
||||
>>> s.expanding().rank(method="min")
|
||||
0 1.0
|
||||
1 2.0
|
||||
2 2.0
|
||||
3 3.0
|
||||
4 5.0
|
||||
5 3.0
|
||||
dtype: float64
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
window_method="expanding",
|
||||
aggregation_description="rank",
|
||||
agg_method="rank",
|
||||
)
|
||||
def rank(
|
||||
self,
|
||||
method: WindowingRankType = "average",
|
||||
ascending: bool = True,
|
||||
pct: bool = False,
|
||||
numeric_only: bool = False,
|
||||
):
|
||||
return super().rank(
|
||||
method=method,
|
||||
ascending=ascending,
|
||||
pct=pct,
|
||||
numeric_only=numeric_only,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
other : Series or DataFrame, optional
|
||||
If not supplied then will default to self and produce pairwise
|
||||
output.
|
||||
pairwise : bool, default None
|
||||
If False then only matching columns between self and other will be
|
||||
used and the output will be a DataFrame.
|
||||
If True then all pairwise combinations will be calculated and the
|
||||
output will be a MultiIndexed DataFrame in the case of DataFrame
|
||||
inputs. In the case of missing elements, only complete pairwise
|
||||
observations will be used.
|
||||
ddof : int, default 1
|
||||
Delta Degrees of Freedom. The divisor used in calculations
|
||||
is ``N - ddof``, where ``N`` represents the number of elements.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
template_see_also,
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser1.expanding().cov(ser2)
|
||||
a NaN
|
||||
b 0.500000
|
||||
c 1.500000
|
||||
d 3.333333
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="sample covariance",
|
||||
agg_method="cov",
|
||||
)
|
||||
def cov(
|
||||
self,
|
||||
other: DataFrame | Series | None = None,
|
||||
pairwise: bool | None = None,
|
||||
ddof: int = 1,
|
||||
numeric_only: bool = False,
|
||||
):
|
||||
return super().cov(
|
||||
other=other,
|
||||
pairwise=pairwise,
|
||||
ddof=ddof,
|
||||
numeric_only=numeric_only,
|
||||
)
|
||||
|
||||
@doc(
|
||||
template_header,
|
||||
create_section_header("Parameters"),
|
||||
dedent(
|
||||
"""
|
||||
other : Series or DataFrame, optional
|
||||
If not supplied then will default to self and produce pairwise
|
||||
output.
|
||||
pairwise : bool, default None
|
||||
If False then only matching columns between self and other will be
|
||||
used and the output will be a DataFrame.
|
||||
If True then all pairwise combinations will be calculated and the
|
||||
output will be a MultiIndexed DataFrame in the case of DataFrame
|
||||
inputs. In the case of missing elements, only complete pairwise
|
||||
observations will be used.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
kwargs_numeric_only,
|
||||
create_section_header("Returns"),
|
||||
template_returns,
|
||||
create_section_header("See Also"),
|
||||
dedent(
|
||||
"""
|
||||
cov : Similar method to calculate covariance.
|
||||
numpy.corrcoef : NumPy Pearson's correlation calculation.
|
||||
"""
|
||||
).replace("\n", "", 1),
|
||||
template_see_also,
|
||||
create_section_header("Notes"),
|
||||
dedent(
|
||||
"""
|
||||
This function uses Pearson's definition of correlation
|
||||
(https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
|
||||
|
||||
When `other` is not specified, the output will be self correlation (e.g.
|
||||
all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
|
||||
set to `True`.
|
||||
|
||||
Function will return ``NaN`` for correlations of equal valued sequences;
|
||||
this is the result of a 0/0 division error.
|
||||
|
||||
When `pairwise` is set to `False`, only matching columns between `self` and
|
||||
`other` will be used.
|
||||
|
||||
When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
|
||||
with the original index on the first level, and the `other` DataFrame
|
||||
columns on the second level.
|
||||
|
||||
In the case of missing elements, only complete pairwise observations
|
||||
will be used.\n
|
||||
"""
|
||||
),
|
||||
create_section_header("Examples"),
|
||||
dedent(
|
||||
"""\
|
||||
>>> ser1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser2 = pd.Series([10, 11, 13, 16], index=['a', 'b', 'c', 'd'])
|
||||
>>> ser1.expanding().corr(ser2)
|
||||
a NaN
|
||||
b 1.000000
|
||||
c 0.981981
|
||||
d 0.975900
|
||||
dtype: float64
|
||||
"""
|
||||
),
|
||||
window_method="expanding",
|
||||
aggregation_description="correlation",
|
||||
agg_method="corr",
|
||||
)
|
||||
def corr(
|
||||
self,
|
||||
other: DataFrame | Series | None = None,
|
||||
pairwise: bool | None = None,
|
||||
ddof: int = 1,
|
||||
numeric_only: bool = False,
|
||||
):
|
||||
return super().corr(
|
||||
other=other,
|
||||
pairwise=pairwise,
|
||||
ddof=ddof,
|
||||
numeric_only=numeric_only,
|
||||
)
|
||||
|
||||
|
||||
class ExpandingGroupby(BaseWindowGroupby, Expanding):
|
||||
"""
|
||||
Provide a expanding groupby implementation.
|
||||
"""
|
||||
|
||||
_attributes = Expanding._attributes + BaseWindowGroupby._attributes
|
||||
|
||||
def _get_window_indexer(self) -> GroupbyIndexer:
|
||||
"""
|
||||
Return an indexer class that will compute the window start and end bounds
|
||||
|
||||
Returns
|
||||
-------
|
||||
GroupbyIndexer
|
||||
"""
|
||||
window_indexer = GroupbyIndexer(
|
||||
groupby_indices=self._grouper.indices,
|
||||
window_indexer=ExpandingIndexer,
|
||||
)
|
||||
return window_indexer
|
351
lib/python3.13/site-packages/pandas/core/window/numba_.py
Normal file
351
lib/python3.13/site-packages/pandas/core/window/numba_.py
Normal file
@ -0,0 +1,351 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
from pandas.core.util.numba_ import jit_user_function
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Scalar
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_apply_func(
|
||||
func: Callable[..., Scalar],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted apply function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a rolling apply function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the rolling apply function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def roll_apply(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
result = np.empty(len(begin))
|
||||
for i in numba.prange(len(result)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
count_nan = np.sum(np.isnan(window))
|
||||
if len(window) - count_nan >= minimum_periods:
|
||||
result[i] = numba_func(window, *args)
|
||||
else:
|
||||
result[i] = np.nan
|
||||
return result
|
||||
|
||||
return roll_apply
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_ewm_func(
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
com: float,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
deltas: tuple,
|
||||
normalize: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted ewm mean or sum function specified by values
|
||||
from engine_kwargs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
com : float
|
||||
adjust : bool
|
||||
ignore_na : bool
|
||||
deltas : tuple
|
||||
normalize : bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def ewm(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
) -> np.ndarray:
|
||||
result = np.empty(len(values))
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
old_wt_factor = 1.0 - alpha
|
||||
new_wt = 1.0 if adjust else alpha
|
||||
|
||||
for i in numba.prange(len(begin)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
sub_result = np.empty(len(window))
|
||||
|
||||
weighted = window[0]
|
||||
nobs = int(not np.isnan(weighted))
|
||||
sub_result[0] = weighted if nobs >= minimum_periods else np.nan
|
||||
old_wt = 1.0
|
||||
|
||||
for j in range(1, len(window)):
|
||||
cur = window[j]
|
||||
is_observation = not np.isnan(cur)
|
||||
nobs += is_observation
|
||||
if not np.isnan(weighted):
|
||||
if is_observation or not ignore_na:
|
||||
if normalize:
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i]
|
||||
# is to be used in conjunction with vals[i+1]
|
||||
old_wt *= old_wt_factor ** deltas[start + j - 1]
|
||||
else:
|
||||
weighted = old_wt_factor * weighted
|
||||
if is_observation:
|
||||
if normalize:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted != cur:
|
||||
weighted = old_wt * weighted + new_wt * cur
|
||||
if normalize:
|
||||
weighted = weighted / (old_wt + new_wt)
|
||||
if adjust:
|
||||
old_wt += new_wt
|
||||
else:
|
||||
old_wt = 1.0
|
||||
else:
|
||||
weighted += cur
|
||||
elif is_observation:
|
||||
weighted = cur
|
||||
|
||||
sub_result[j] = weighted if nobs >= minimum_periods else np.nan
|
||||
|
||||
result[start:stop] = sub_result
|
||||
|
||||
return result
|
||||
|
||||
return ewm
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_table_func(
|
||||
func: Callable[..., np.ndarray],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted function to apply window calculations table-wise.
|
||||
|
||||
Func will be passed a M window size x N number of columns array, and
|
||||
must return a 1 x N number of columns array. Func is intended to operate
|
||||
row-wise, but the result will be transposed for axis=1.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a rolling apply function with the jitted function inline
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def roll_table(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
*args: Any,
|
||||
):
|
||||
result = np.empty((len(begin), values.shape[1]))
|
||||
min_periods_mask = np.empty(result.shape)
|
||||
for i in numba.prange(len(result)):
|
||||
start = begin[i]
|
||||
stop = end[i]
|
||||
window = values[start:stop]
|
||||
count_nan = np.sum(np.isnan(window), axis=0)
|
||||
sub_result = numba_func(window, *args)
|
||||
nan_mask = len(window) - count_nan >= minimum_periods
|
||||
min_periods_mask[i, :] = nan_mask
|
||||
result[i, :] = sub_result
|
||||
result = np.where(min_periods_mask, result, np.nan)
|
||||
return result
|
||||
|
||||
return roll_table
|
||||
|
||||
|
||||
# This function will no longer be needed once numba supports
|
||||
# axis for all np.nan* agg functions
|
||||
# https://github.com/numba/numba/issues/1269
|
||||
@functools.cache
|
||||
def generate_manual_numpy_nan_agg_with_axis(nan_func):
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=True, nogil=True, parallel=True)
|
||||
def nan_agg_with_axis(table):
|
||||
result = np.empty(table.shape[1])
|
||||
for i in numba.prange(table.shape[1]):
|
||||
partition = table[:, i]
|
||||
result[i] = nan_func(partition)
|
||||
return result
|
||||
|
||||
return nan_agg_with_axis
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_ewm_table_func(
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
com: float,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
deltas: tuple,
|
||||
normalize: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted ewm mean or sum function applied table wise specified
|
||||
by values from engine_kwargs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
com : float
|
||||
adjust : bool
|
||||
ignore_na : bool
|
||||
deltas : tuple
|
||||
normalize: bool
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def ewm_table(
|
||||
values: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
minimum_periods: int,
|
||||
) -> np.ndarray:
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
old_wt_factor = 1.0 - alpha
|
||||
new_wt = 1.0 if adjust else alpha
|
||||
old_wt = np.ones(values.shape[1])
|
||||
|
||||
result = np.empty(values.shape)
|
||||
weighted = values[0].copy()
|
||||
nobs = (~np.isnan(weighted)).astype(np.int64)
|
||||
result[0] = np.where(nobs >= minimum_periods, weighted, np.nan)
|
||||
for i in range(1, len(values)):
|
||||
cur = values[i]
|
||||
is_observations = ~np.isnan(cur)
|
||||
nobs += is_observations.astype(np.int64)
|
||||
for j in numba.prange(len(cur)):
|
||||
if not np.isnan(weighted[j]):
|
||||
if is_observations[j] or not ignore_na:
|
||||
if normalize:
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i]
|
||||
# is to be used in conjunction with vals[i+1]
|
||||
old_wt[j] *= old_wt_factor ** deltas[i - 1]
|
||||
else:
|
||||
weighted[j] = old_wt_factor * weighted[j]
|
||||
if is_observations[j]:
|
||||
if normalize:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted[j] != cur[j]:
|
||||
weighted[j] = (
|
||||
old_wt[j] * weighted[j] + new_wt * cur[j]
|
||||
)
|
||||
if normalize:
|
||||
weighted[j] = weighted[j] / (old_wt[j] + new_wt)
|
||||
if adjust:
|
||||
old_wt[j] += new_wt
|
||||
else:
|
||||
old_wt[j] = 1.0
|
||||
else:
|
||||
weighted[j] += cur[j]
|
||||
elif is_observations[j]:
|
||||
weighted[j] = cur[j]
|
||||
|
||||
result[i] = np.where(nobs >= minimum_periods, weighted, np.nan)
|
||||
|
||||
return result
|
||||
|
||||
return ewm_table
|
118
lib/python3.13/site-packages/pandas/core/window/online.py
Normal file
118
lib/python3.13/site-packages/pandas/core/window/online.py
Normal file
@ -0,0 +1,118 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
|
||||
def generate_online_numba_ewma_func(
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
):
|
||||
"""
|
||||
Generate a numba jitted groupby ewma function specified by values
|
||||
from engine_kwargs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def online_ewma(
|
||||
values: np.ndarray,
|
||||
deltas: np.ndarray,
|
||||
minimum_periods: int,
|
||||
old_wt_factor: float,
|
||||
new_wt: float,
|
||||
old_wt: np.ndarray,
|
||||
adjust: bool,
|
||||
ignore_na: bool,
|
||||
):
|
||||
"""
|
||||
Compute online exponentially weighted mean per column over 2D values.
|
||||
|
||||
Takes the first observation as is, then computes the subsequent
|
||||
exponentially weighted mean accounting minimum periods.
|
||||
"""
|
||||
result = np.empty(values.shape)
|
||||
weighted_avg = values[0].copy()
|
||||
nobs = (~np.isnan(weighted_avg)).astype(np.int64)
|
||||
result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
|
||||
|
||||
for i in range(1, len(values)):
|
||||
cur = values[i]
|
||||
is_observations = ~np.isnan(cur)
|
||||
nobs += is_observations.astype(np.int64)
|
||||
for j in numba.prange(len(cur)):
|
||||
if not np.isnan(weighted_avg[j]):
|
||||
if is_observations[j] or not ignore_na:
|
||||
# note that len(deltas) = len(vals) - 1 and deltas[i] is to be
|
||||
# used in conjunction with vals[i+1]
|
||||
old_wt[j] *= old_wt_factor ** deltas[j - 1]
|
||||
if is_observations[j]:
|
||||
# avoid numerical errors on constant series
|
||||
if weighted_avg[j] != cur[j]:
|
||||
weighted_avg[j] = (
|
||||
(old_wt[j] * weighted_avg[j]) + (new_wt * cur[j])
|
||||
) / (old_wt[j] + new_wt)
|
||||
if adjust:
|
||||
old_wt[j] += new_wt
|
||||
else:
|
||||
old_wt[j] = 1.0
|
||||
elif is_observations[j]:
|
||||
weighted_avg[j] = cur[j]
|
||||
|
||||
result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
|
||||
|
||||
return result, old_wt
|
||||
|
||||
return online_ewma
|
||||
|
||||
|
||||
class EWMMeanState:
|
||||
def __init__(self, com, adjust, ignore_na, axis, shape) -> None:
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
self.axis = axis
|
||||
self.shape = shape
|
||||
self.adjust = adjust
|
||||
self.ignore_na = ignore_na
|
||||
self.new_wt = 1.0 if adjust else alpha
|
||||
self.old_wt_factor = 1.0 - alpha
|
||||
self.old_wt = np.ones(self.shape[self.axis - 1])
|
||||
self.last_ewm = None
|
||||
|
||||
def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func):
|
||||
result, old_wt = ewm_func(
|
||||
weighted_avg,
|
||||
deltas,
|
||||
min_periods,
|
||||
self.old_wt_factor,
|
||||
self.new_wt,
|
||||
self.old_wt,
|
||||
self.adjust,
|
||||
self.ignore_na,
|
||||
)
|
||||
self.old_wt = old_wt
|
||||
self.last_ewm = result[-1]
|
||||
return result
|
||||
|
||||
def reset(self) -> None:
|
||||
self.old_wt = np.ones(self.shape[self.axis - 1])
|
||||
self.last_ewm = None
|
2930
lib/python3.13/site-packages/pandas/core/window/rolling.py
Normal file
2930
lib/python3.13/site-packages/pandas/core/window/rolling.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user