Updated script that can be controled by Nodejs web app
This commit is contained in:
@@ -0,0 +1,125 @@
|
||||
"""
|
||||
Numba 1D min/max kernels that can be shared by
|
||||
* Dataframe / Series
|
||||
* groupby
|
||||
* rolling / expanding
|
||||
|
||||
Mirrors pandas/_libs/window/aggregation.pyx
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numba
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import npt
|
||||
|
||||
|
||||
@numba.jit(nopython=True, nogil=True, parallel=False)
|
||||
def sliding_min_max(
|
||||
values: np.ndarray,
|
||||
result_dtype: np.dtype,
|
||||
start: np.ndarray,
|
||||
end: np.ndarray,
|
||||
min_periods: int,
|
||||
is_max: bool,
|
||||
) -> tuple[np.ndarray, list[int]]:
|
||||
N = len(start)
|
||||
nobs = 0
|
||||
output = np.empty(N, dtype=result_dtype)
|
||||
na_pos = []
|
||||
# Use deque once numba supports it
|
||||
# https://github.com/numba/numba/issues/7417
|
||||
Q: list = []
|
||||
W: list = []
|
||||
for i in range(N):
|
||||
curr_win_size = end[i] - start[i]
|
||||
if i == 0:
|
||||
st = start[i]
|
||||
else:
|
||||
st = end[i - 1]
|
||||
|
||||
for k in range(st, end[i]):
|
||||
ai = values[k]
|
||||
if not np.isnan(ai):
|
||||
nobs += 1
|
||||
elif is_max:
|
||||
ai = -np.inf
|
||||
else:
|
||||
ai = np.inf
|
||||
# Discard previous entries if we find new min or max
|
||||
if is_max:
|
||||
while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
|
||||
Q.pop()
|
||||
else:
|
||||
while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
|
||||
Q.pop()
|
||||
Q.append(k)
|
||||
W.append(k)
|
||||
|
||||
# Discard entries outside and left of current window
|
||||
while Q and Q[0] <= start[i] - 1:
|
||||
Q.pop(0)
|
||||
while W and W[0] <= start[i] - 1:
|
||||
if not np.isnan(values[W[0]]):
|
||||
nobs -= 1
|
||||
W.pop(0)
|
||||
|
||||
# Save output based on index in input value array
|
||||
if Q and curr_win_size > 0 and nobs >= min_periods:
|
||||
output[i] = values[Q[0]]
|
||||
else:
|
||||
if values.dtype.kind != "i":
|
||||
output[i] = np.nan
|
||||
else:
|
||||
na_pos.append(i)
|
||||
|
||||
return output, na_pos
|
||||
|
||||
|
||||
@numba.jit(nopython=True, nogil=True, parallel=False)
|
||||
def grouped_min_max(
|
||||
values: np.ndarray,
|
||||
result_dtype: np.dtype,
|
||||
labels: npt.NDArray[np.intp],
|
||||
ngroups: int,
|
||||
min_periods: int,
|
||||
is_max: bool,
|
||||
) -> tuple[np.ndarray, list[int]]:
|
||||
N = len(labels)
|
||||
nobs = np.zeros(ngroups, dtype=np.int64)
|
||||
na_pos = []
|
||||
output = np.empty(ngroups, dtype=result_dtype)
|
||||
|
||||
for i in range(N):
|
||||
lab = labels[i]
|
||||
val = values[i]
|
||||
if lab < 0:
|
||||
continue
|
||||
|
||||
if values.dtype.kind == "i" or not np.isnan(val):
|
||||
nobs[lab] += 1
|
||||
else:
|
||||
# NaN value cannot be a min/max value
|
||||
continue
|
||||
|
||||
if nobs[lab] == 1:
|
||||
# First element in group, set output equal to this
|
||||
output[lab] = val
|
||||
continue
|
||||
|
||||
if is_max:
|
||||
if val > output[lab]:
|
||||
output[lab] = val
|
||||
else:
|
||||
if val < output[lab]:
|
||||
output[lab] = val
|
||||
|
||||
# Set labels that don't satisfy min_periods as np.nan
|
||||
for lab, count in enumerate(nobs):
|
||||
if count < min_periods:
|
||||
na_pos.append(lab)
|
||||
|
||||
return output, na_pos
|
Reference in New Issue
Block a user