Updated script that can be controled by Nodejs web app
This commit is contained in:
15
lib/python3.13/site-packages/pandas/core/groupby/__init__.py
Normal file
15
lib/python3.13/site-packages/pandas/core/groupby/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
from pandas.core.groupby.generic import (
|
||||
DataFrameGroupBy,
|
||||
NamedAgg,
|
||||
SeriesGroupBy,
|
||||
)
|
||||
from pandas.core.groupby.groupby import GroupBy
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
|
||||
__all__ = [
|
||||
"DataFrameGroupBy",
|
||||
"NamedAgg",
|
||||
"SeriesGroupBy",
|
||||
"GroupBy",
|
||||
"Grouper",
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
121
lib/python3.13/site-packages/pandas/core/groupby/base.py
Normal file
121
lib/python3.13/site-packages/pandas/core/groupby/base.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""
|
||||
Provide basic components for groupby.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Hashable
|
||||
|
||||
|
||||
@dataclasses.dataclass(order=True, frozen=True)
|
||||
class OutputKey:
|
||||
label: Hashable
|
||||
position: int
|
||||
|
||||
|
||||
# special case to prevent duplicate plots when catching exceptions when
|
||||
# forwarding methods from NDFrames
|
||||
plotting_methods = frozenset(["plot", "hist"])
|
||||
|
||||
# cythonized transformations or canned "agg+broadcast", which do not
|
||||
# require postprocessing of the result by transform.
|
||||
cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"])
|
||||
|
||||
# List of aggregation/reduction functions.
|
||||
# These map each group to a single numeric value
|
||||
reduction_kernels = frozenset(
|
||||
[
|
||||
"all",
|
||||
"any",
|
||||
"corrwith",
|
||||
"count",
|
||||
"first",
|
||||
"idxmax",
|
||||
"idxmin",
|
||||
"last",
|
||||
"max",
|
||||
"mean",
|
||||
"median",
|
||||
"min",
|
||||
"nunique",
|
||||
"prod",
|
||||
# as long as `quantile`'s signature accepts only
|
||||
# a single quantile value, it's a reduction.
|
||||
# GH#27526 might change that.
|
||||
"quantile",
|
||||
"sem",
|
||||
"size",
|
||||
"skew",
|
||||
"std",
|
||||
"sum",
|
||||
"var",
|
||||
]
|
||||
)
|
||||
|
||||
# List of transformation functions.
|
||||
# a transformation is a function that, for each group,
|
||||
# produces a result that has the same shape as the group.
|
||||
|
||||
|
||||
transformation_kernels = frozenset(
|
||||
[
|
||||
"bfill",
|
||||
"cumcount",
|
||||
"cummax",
|
||||
"cummin",
|
||||
"cumprod",
|
||||
"cumsum",
|
||||
"diff",
|
||||
"ffill",
|
||||
"fillna",
|
||||
"ngroup",
|
||||
"pct_change",
|
||||
"rank",
|
||||
"shift",
|
||||
]
|
||||
)
|
||||
|
||||
# these are all the public methods on Grouper which don't belong
|
||||
# in either of the above lists
|
||||
groupby_other_methods = frozenset(
|
||||
[
|
||||
"agg",
|
||||
"aggregate",
|
||||
"apply",
|
||||
"boxplot",
|
||||
# corr and cov return ngroups*ncolumns rows, so they
|
||||
# are neither a transformation nor a reduction
|
||||
"corr",
|
||||
"cov",
|
||||
"describe",
|
||||
"dtypes",
|
||||
"expanding",
|
||||
"ewm",
|
||||
"filter",
|
||||
"get_group",
|
||||
"groups",
|
||||
"head",
|
||||
"hist",
|
||||
"indices",
|
||||
"ndim",
|
||||
"ngroups",
|
||||
"nth",
|
||||
"ohlc",
|
||||
"pipe",
|
||||
"plot",
|
||||
"resample",
|
||||
"rolling",
|
||||
"tail",
|
||||
"take",
|
||||
"transform",
|
||||
"sample",
|
||||
"value_counts",
|
||||
]
|
||||
)
|
||||
# Valid values of `name` for `groupby.transform(name)`
|
||||
# NOTE: do NOT edit this directly. New additions should be inserted
|
||||
# into the appropriate list above.
|
||||
transform_kernel_allowlist = reduction_kernels | transformation_kernels
|
@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.algorithms import unique1d
|
||||
from pandas.core.arrays.categorical import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
recode_for_categories,
|
||||
)
|
||||
|
||||
|
||||
def recode_for_groupby(
|
||||
c: Categorical, sort: bool, observed: bool
|
||||
) -> tuple[Categorical, Categorical | None]:
|
||||
"""
|
||||
Code the categories to ensure we can groupby for categoricals.
|
||||
|
||||
If observed=True, we return a new Categorical with the observed
|
||||
categories only.
|
||||
|
||||
If sort=False, return a copy of self, coded with categories as
|
||||
returned by .unique(), followed by any categories not appearing in
|
||||
the data. If sort=True, return self.
|
||||
|
||||
This method is needed solely to ensure the categorical index of the
|
||||
GroupBy result has categories in the order of appearance in the data
|
||||
(GH-8868).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : Categorical
|
||||
sort : bool
|
||||
The value of the sort parameter groupby was called with.
|
||||
observed : bool
|
||||
Account only for the observed values
|
||||
|
||||
Returns
|
||||
-------
|
||||
Categorical
|
||||
If sort=False, the new categories are set to the order of
|
||||
appearance in codes (unless ordered=True, in which case the
|
||||
original order is preserved), followed by any unrepresented
|
||||
categories in the original order.
|
||||
Categorical or None
|
||||
If we are observed, return the original categorical, otherwise None
|
||||
"""
|
||||
# we only care about observed values
|
||||
if observed:
|
||||
# In cases with c.ordered, this is equivalent to
|
||||
# return c.remove_unused_categories(), c
|
||||
|
||||
unique_codes = unique1d(c.codes)
|
||||
|
||||
take_codes = unique_codes[unique_codes != -1]
|
||||
if sort:
|
||||
take_codes = np.sort(take_codes)
|
||||
|
||||
# we recode according to the uniques
|
||||
categories = c.categories.take(take_codes)
|
||||
codes = recode_for_categories(c.codes, c.categories, categories)
|
||||
|
||||
# return a new categorical that maps our new codes
|
||||
# and categories
|
||||
dtype = CategoricalDtype(categories, ordered=c.ordered)
|
||||
return Categorical._simple_new(codes, dtype=dtype), c
|
||||
|
||||
# Already sorted according to c.categories; all is fine
|
||||
if sort:
|
||||
return c, None
|
||||
|
||||
# sort=False should order groups in as-encountered order (GH-8868)
|
||||
|
||||
# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
|
||||
all_codes = np.arange(c.categories.nunique())
|
||||
# GH 38140: exclude nan from indexer for categories
|
||||
unique_notnan_codes = unique1d(c.codes[c.codes != -1])
|
||||
if sort:
|
||||
unique_notnan_codes = np.sort(unique_notnan_codes)
|
||||
if len(all_codes) > len(unique_notnan_codes):
|
||||
# GH 13179: All categories need to be present, even if missing from the data
|
||||
missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
|
||||
take_codes = np.concatenate((unique_notnan_codes, missing_codes))
|
||||
else:
|
||||
take_codes = unique_notnan_codes
|
||||
|
||||
return Categorical(c, c.unique().categories.take(take_codes)), None
|
2852
lib/python3.13/site-packages/pandas/core/groupby/generic.py
Normal file
2852
lib/python3.13/site-packages/pandas/core/groupby/generic.py
Normal file
File diff suppressed because it is too large
Load Diff
5997
lib/python3.13/site-packages/pandas/core/groupby/groupby.py
Normal file
5997
lib/python3.13/site-packages/pandas/core/groupby/groupby.py
Normal file
File diff suppressed because it is too large
Load Diff
1102
lib/python3.13/site-packages/pandas/core/groupby/grouper.py
Normal file
1102
lib/python3.13/site-packages/pandas/core/groupby/grouper.py
Normal file
File diff suppressed because it is too large
Load Diff
304
lib/python3.13/site-packages/pandas/core/groupby/indexing.py
Normal file
304
lib/python3.13/site-packages/pandas/core/groupby/indexing.py
Normal file
@ -0,0 +1,304 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Literal,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.util._decorators import (
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer,
|
||||
is_list_like,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import PositionalIndexer
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.groupby import groupby
|
||||
|
||||
|
||||
class GroupByIndexingMixin:
|
||||
"""
|
||||
Mixin for adding ._positional_selector to GroupBy.
|
||||
"""
|
||||
|
||||
@cache_readonly
|
||||
def _positional_selector(self) -> GroupByPositionalSelector:
|
||||
"""
|
||||
Return positional selection for each group.
|
||||
|
||||
``groupby._positional_selector[i:j]`` is similar to
|
||||
``groupby.apply(lambda x: x.iloc[i:j])``
|
||||
but much faster and preserves the original index and order.
|
||||
|
||||
``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
|
||||
and :meth:`~GroupBy.tail`. For example:
|
||||
|
||||
- ``head(5)``
|
||||
- ``_positional_selector[5:-5]``
|
||||
- ``tail(5)``
|
||||
|
||||
together return all the rows.
|
||||
|
||||
Allowed inputs for the index are:
|
||||
|
||||
- An integer valued iterable, e.g. ``range(2, 4)``.
|
||||
- A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.
|
||||
|
||||
The output format is the same as :meth:`~GroupBy.head` and
|
||||
:meth:`~GroupBy.tail`, namely
|
||||
a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
The filtered subset of the original Series.
|
||||
DataFrame
|
||||
The filtered subset of the original DataFrame.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.iloc : Purely integer-location based indexing for selection by
|
||||
position.
|
||||
GroupBy.head : Return first n rows of each group.
|
||||
GroupBy.tail : Return last n rows of each group.
|
||||
GroupBy.nth : Take the nth row from each group if n is an int, or a
|
||||
subset of rows, if n is a list of ints.
|
||||
|
||||
Notes
|
||||
-----
|
||||
- The slice step cannot be negative.
|
||||
- If the index specification results in overlaps, the item is not duplicated.
|
||||
- If the index specification changes the order of items, then
|
||||
they are returned in their original order.
|
||||
By contrast, ``DataFrame.iloc`` can change the row order.
|
||||
- ``groupby()`` parameters such as as_index and dropna are ignored.
|
||||
|
||||
The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
|
||||
with ``as_index=False`` are:
|
||||
|
||||
- Input to ``_positional_selector`` can include
|
||||
one or more slices whereas ``nth``
|
||||
just handles an integer or a list of integers.
|
||||
- ``_positional_selector`` can accept a slice relative to the
|
||||
last row of each group.
|
||||
- ``_positional_selector`` does not have an equivalent to the
|
||||
``nth()`` ``dropna`` parameter.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],
|
||||
... columns=["A", "B"])
|
||||
>>> df.groupby("A")._positional_selector[1:2]
|
||||
A B
|
||||
1 a 2
|
||||
4 b 5
|
||||
|
||||
>>> df.groupby("A")._positional_selector[1, -1]
|
||||
A B
|
||||
1 a 2
|
||||
2 a 3
|
||||
4 b 5
|
||||
"""
|
||||
if TYPE_CHECKING:
|
||||
# pylint: disable-next=used-before-assignment
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return GroupByPositionalSelector(groupby_self)
|
||||
|
||||
def _make_mask_from_positional_indexer(
|
||||
self,
|
||||
arg: PositionalIndexer | tuple,
|
||||
) -> np.ndarray:
|
||||
if is_list_like(arg):
|
||||
if all(is_integer(i) for i in cast(Iterable, arg)):
|
||||
mask = self._make_mask_from_list(cast(Iterable[int], arg))
|
||||
else:
|
||||
mask = self._make_mask_from_tuple(cast(tuple, arg))
|
||||
|
||||
elif isinstance(arg, slice):
|
||||
mask = self._make_mask_from_slice(arg)
|
||||
elif is_integer(arg):
|
||||
mask = self._make_mask_from_int(cast(int, arg))
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Invalid index {type(arg)}. "
|
||||
"Must be integer, list-like, slice or a tuple of "
|
||||
"integers and slices"
|
||||
)
|
||||
|
||||
if isinstance(mask, bool):
|
||||
if mask:
|
||||
mask = self._ascending_count >= 0
|
||||
else:
|
||||
mask = self._ascending_count < 0
|
||||
|
||||
return cast(np.ndarray, mask)
|
||||
|
||||
def _make_mask_from_int(self, arg: int) -> np.ndarray:
|
||||
if arg >= 0:
|
||||
return self._ascending_count == arg
|
||||
else:
|
||||
return self._descending_count == (-arg - 1)
|
||||
|
||||
def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:
|
||||
positive = [arg for arg in args if arg >= 0]
|
||||
negative = [-arg - 1 for arg in args if arg < 0]
|
||||
|
||||
mask: bool | np.ndarray = False
|
||||
|
||||
if positive:
|
||||
mask |= np.isin(self._ascending_count, positive)
|
||||
|
||||
if negative:
|
||||
mask |= np.isin(self._descending_count, negative)
|
||||
|
||||
return mask
|
||||
|
||||
def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:
|
||||
mask: bool | np.ndarray = False
|
||||
|
||||
for arg in args:
|
||||
if is_integer(arg):
|
||||
mask |= self._make_mask_from_int(cast(int, arg))
|
||||
elif isinstance(arg, slice):
|
||||
mask |= self._make_mask_from_slice(arg)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid argument {type(arg)}. Should be int or slice."
|
||||
)
|
||||
|
||||
return mask
|
||||
|
||||
def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:
|
||||
start = arg.start
|
||||
stop = arg.stop
|
||||
step = arg.step
|
||||
|
||||
if step is not None and step < 0:
|
||||
raise ValueError(f"Invalid step {step}. Must be non-negative")
|
||||
|
||||
mask: bool | np.ndarray = True
|
||||
|
||||
if step is None:
|
||||
step = 1
|
||||
|
||||
if start is None:
|
||||
if step > 1:
|
||||
mask &= self._ascending_count % step == 0
|
||||
|
||||
elif start >= 0:
|
||||
mask &= self._ascending_count >= start
|
||||
|
||||
if step > 1:
|
||||
mask &= (self._ascending_count - start) % step == 0
|
||||
|
||||
else:
|
||||
mask &= self._descending_count < -start
|
||||
|
||||
offset_array = self._descending_count + start + 1
|
||||
limit_array = (
|
||||
self._ascending_count + self._descending_count + (start + 1)
|
||||
) < 0
|
||||
offset_array = np.where(limit_array, self._ascending_count, offset_array)
|
||||
|
||||
mask &= offset_array % step == 0
|
||||
|
||||
if stop is not None:
|
||||
if stop >= 0:
|
||||
mask &= self._ascending_count < stop
|
||||
else:
|
||||
mask &= self._descending_count >= -stop
|
||||
|
||||
return mask
|
||||
|
||||
@cache_readonly
|
||||
def _ascending_count(self) -> np.ndarray:
|
||||
if TYPE_CHECKING:
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return groupby_self._cumcount_array()
|
||||
|
||||
@cache_readonly
|
||||
def _descending_count(self) -> np.ndarray:
|
||||
if TYPE_CHECKING:
|
||||
groupby_self = cast(groupby.GroupBy, self)
|
||||
else:
|
||||
groupby_self = self
|
||||
|
||||
return groupby_self._cumcount_array(ascending=False)
|
||||
|
||||
|
||||
@doc(GroupByIndexingMixin._positional_selector)
|
||||
class GroupByPositionalSelector:
|
||||
def __init__(self, groupby_object: groupby.GroupBy) -> None:
|
||||
self.groupby_object = groupby_object
|
||||
|
||||
def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
|
||||
"""
|
||||
Select by positional index per group.
|
||||
|
||||
Implements GroupBy._positional_selector
|
||||
|
||||
Parameters
|
||||
----------
|
||||
arg : PositionalIndexer | tuple
|
||||
Allowed values are:
|
||||
- int
|
||||
- int valued iterable such as list or range
|
||||
- slice with step either None or positive
|
||||
- tuple of integers and slices
|
||||
|
||||
Returns
|
||||
-------
|
||||
Series
|
||||
The filtered subset of the original groupby Series.
|
||||
DataFrame
|
||||
The filtered subset of the original groupby DataFrame.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.iloc : Integer-location based indexing for selection by position.
|
||||
GroupBy.head : Return first n rows of each group.
|
||||
GroupBy.tail : Return last n rows of each group.
|
||||
GroupBy._positional_selector : Return positional selection for each group.
|
||||
GroupBy.nth : Take the nth row from each group if n is an int, or a
|
||||
subset of rows, if n is a list of ints.
|
||||
"""
|
||||
mask = self.groupby_object._make_mask_from_positional_indexer(arg)
|
||||
return self.groupby_object._mask_selected_obj(mask)
|
||||
|
||||
|
||||
class GroupByNthSelector:
|
||||
"""
|
||||
Dynamically substituted for GroupBy.nth to enable both call and index
|
||||
"""
|
||||
|
||||
def __init__(self, groupby_object: groupby.GroupBy) -> None:
|
||||
self.groupby_object = groupby_object
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
n: PositionalIndexer | tuple,
|
||||
dropna: Literal["any", "all", None] = None,
|
||||
) -> DataFrame | Series:
|
||||
return self.groupby_object._nth(n, dropna)
|
||||
|
||||
def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
|
||||
return self.groupby_object._nth(n)
|
181
lib/python3.13/site-packages/pandas/core/groupby/numba_.py
Normal file
181
lib/python3.13/site-packages/pandas/core/groupby/numba_.py
Normal file
@ -0,0 +1,181 @@
|
||||
"""Common utilities for Numba operations with groupby ops"""
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
from pandas.core.util.numba_ import (
|
||||
NumbaUtilError,
|
||||
jit_user_function,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import Scalar
|
||||
|
||||
|
||||
def validate_udf(func: Callable) -> None:
|
||||
"""
|
||||
Validate user defined function for ops when using Numba with groupby ops.
|
||||
|
||||
The first signature arguments should include:
|
||||
|
||||
def f(values, index, ...):
|
||||
...
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function, default False
|
||||
user defined function
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
NumbaUtilError
|
||||
"""
|
||||
if not callable(func):
|
||||
raise NotImplementedError(
|
||||
"Numba engine can only be used with a single function."
|
||||
)
|
||||
udf_signature = list(inspect.signature(func).parameters.keys())
|
||||
expected_args = ["values", "index"]
|
||||
min_number_args = len(expected_args)
|
||||
if (
|
||||
len(udf_signature) < min_number_args
|
||||
or udf_signature[:min_number_args] != expected_args
|
||||
):
|
||||
raise NumbaUtilError(
|
||||
f"The first {min_number_args} arguments to {func.__name__} must be "
|
||||
f"{expected_args}"
|
||||
)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_agg_func(
|
||||
func: Callable[..., Scalar],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]:
|
||||
"""
|
||||
Generate a numba jitted agg function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a groupby agg function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the groupby evaluation loop.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each group and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def group_agg(
|
||||
values: np.ndarray,
|
||||
index: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
num_columns: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
assert len(begin) == len(end)
|
||||
num_groups = len(begin)
|
||||
|
||||
result = np.empty((num_groups, num_columns))
|
||||
for i in numba.prange(num_groups):
|
||||
group_index = index[begin[i] : end[i]]
|
||||
for j in numba.prange(num_columns):
|
||||
group = values[begin[i] : end[i], j]
|
||||
result[i, j] = numba_func(group, group_index, *args)
|
||||
return result
|
||||
|
||||
return group_agg
|
||||
|
||||
|
||||
@functools.cache
|
||||
def generate_numba_transform_func(
|
||||
func: Callable[..., np.ndarray],
|
||||
nopython: bool,
|
||||
nogil: bool,
|
||||
parallel: bool,
|
||||
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]:
|
||||
"""
|
||||
Generate a numba jitted transform function specified by values from engine_kwargs.
|
||||
|
||||
1. jit the user's function
|
||||
2. Return a groupby transform function with the jitted function inline
|
||||
|
||||
Configurations specified in engine_kwargs apply to both the user's
|
||||
function _AND_ the groupby evaluation loop.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
function to be applied to each window and will be JITed
|
||||
nopython : bool
|
||||
nopython to be passed into numba.jit
|
||||
nogil : bool
|
||||
nogil to be passed into numba.jit
|
||||
parallel : bool
|
||||
parallel to be passed into numba.jit
|
||||
|
||||
Returns
|
||||
-------
|
||||
Numba function
|
||||
"""
|
||||
numba_func = jit_user_function(func)
|
||||
if TYPE_CHECKING:
|
||||
import numba
|
||||
else:
|
||||
numba = import_optional_dependency("numba")
|
||||
|
||||
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
|
||||
def group_transform(
|
||||
values: np.ndarray,
|
||||
index: np.ndarray,
|
||||
begin: np.ndarray,
|
||||
end: np.ndarray,
|
||||
num_columns: int,
|
||||
*args: Any,
|
||||
) -> np.ndarray:
|
||||
assert len(begin) == len(end)
|
||||
num_groups = len(begin)
|
||||
|
||||
result = np.empty((len(values), num_columns))
|
||||
for i in numba.prange(num_groups):
|
||||
group_index = index[begin[i] : end[i]]
|
||||
for j in numba.prange(num_columns):
|
||||
group = values[begin[i] : end[i], j]
|
||||
result[begin[i] : end[i], j] = numba_func(group, group_index, *args)
|
||||
return result
|
||||
|
||||
return group_transform
|
1208
lib/python3.13/site-packages/pandas/core/groupby/ops.py
Normal file
1208
lib/python3.13/site-packages/pandas/core/groupby/ops.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user