Updated script that can be controled by Nodejs web app
This commit is contained in:
146
lib/python3.13/site-packages/pandas/tests/window/conftest.py
Normal file
146
lib/python3.13/site-packages/pandas/tests/window/conftest.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
bdate_range,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def raw(request):
|
||||
"""raw keyword argument for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"sum",
|
||||
"mean",
|
||||
"median",
|
||||
"max",
|
||||
"min",
|
||||
"var",
|
||||
"std",
|
||||
"kurt",
|
||||
"skew",
|
||||
"count",
|
||||
"sem",
|
||||
]
|
||||
)
|
||||
def arithmetic_win_operators(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def center(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, 1])
|
||||
def min_periods(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def parallel(request):
|
||||
"""parallel keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
# Can parameterize nogil & nopython over True | False, but limiting per
|
||||
# https://github.com/pandas-dev/pandas/pull/41971#issuecomment-860607472
|
||||
|
||||
|
||||
@pytest.fixture(params=[False])
|
||||
def nogil(request):
|
||||
"""nogil keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True])
|
||||
def nopython(request):
|
||||
"""nopython keyword argument for numba.jit"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def adjust(request):
|
||||
"""adjust keyword argument for ewm"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def ignore_na(request):
|
||||
"""ignore_na keyword argument for ewm"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def numeric_only(request):
|
||||
"""numeric_only keyword argument"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]),
|
||||
"cython",
|
||||
]
|
||||
)
|
||||
def engine(request):
|
||||
"""engine keyword argument for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pytest.param(
|
||||
("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]
|
||||
),
|
||||
("cython", True),
|
||||
("cython", False),
|
||||
]
|
||||
)
|
||||
def engine_and_raw(request):
|
||||
"""engine and raw keyword arguments for rolling.apply"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["1 day", timedelta(days=1), np.timedelta64(1, "D")])
|
||||
def halflife_with_times(request):
|
||||
"""Halflife argument for EWM when times is specified."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series():
|
||||
"""Make mocked series as fixture."""
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
|
||||
return series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame():
|
||||
"""Make mocked frame as fixture."""
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((100, 10)),
|
||||
index=bdate_range(datetime(2009, 1, 1), periods=100),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, 1, 2, 5, 10])
|
||||
def step(request):
|
||||
"""step keyword argument for rolling window operations."""
|
||||
return request.param
|
@@ -0,0 +1,72 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
notna,
|
||||
)
|
||||
|
||||
|
||||
def create_series():
|
||||
return [
|
||||
Series(dtype=np.float64, name="a"),
|
||||
Series([np.nan] * 5),
|
||||
Series([1.0] * 5),
|
||||
Series(range(5, 0, -1)),
|
||||
Series(range(5)),
|
||||
Series([np.nan, 1.0, np.nan, 1.0, 1.0]),
|
||||
Series([np.nan, 1.0, np.nan, 2.0, 3.0]),
|
||||
Series([np.nan, 1.0, np.nan, 3.0, 2.0]),
|
||||
]
|
||||
|
||||
|
||||
def create_dataframes():
|
||||
return [
|
||||
DataFrame(columns=["a", "a"]),
|
||||
DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]),
|
||||
] + [DataFrame(s) for s in create_series()]
|
||||
|
||||
|
||||
def is_constant(x):
|
||||
values = x.values.ravel("K")
|
||||
return len(set(values[notna(values)])) == 1
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=(
|
||||
obj
|
||||
for obj in itertools.chain(create_series(), create_dataframes())
|
||||
if is_constant(obj)
|
||||
),
|
||||
)
|
||||
def consistent_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=create_series())
|
||||
def series_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=itertools.chain(create_series(), create_dataframes()))
|
||||
def all_data(request):
|
||||
"""
|
||||
Test:
|
||||
- Empty Series / DataFrame
|
||||
- All NaN
|
||||
- All consistent value
|
||||
- Monotonically decreasing
|
||||
- Monotonically increasing
|
||||
- Monotonically consistent with NaNs
|
||||
- Monotonically increasing with NaNs
|
||||
- Monotonically decreasing with NaNs
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 2])
|
||||
def min_periods(request):
|
||||
return request.param
|
@@ -0,0 +1,243 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def create_mock_weights(obj, com, adjust, ignore_na):
|
||||
if isinstance(obj, DataFrame):
|
||||
if not len(obj.columns):
|
||||
return DataFrame(index=obj.index, columns=obj.columns)
|
||||
w = concat(
|
||||
[
|
||||
create_mock_series_weights(
|
||||
obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na
|
||||
)
|
||||
for i in range(len(obj.columns))
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
w.index = obj.index
|
||||
w.columns = obj.columns
|
||||
return w
|
||||
else:
|
||||
return create_mock_series_weights(obj, com, adjust, ignore_na)
|
||||
|
||||
|
||||
def create_mock_series_weights(s, com, adjust, ignore_na):
|
||||
w = Series(np.nan, index=s.index, name=s.name)
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
if adjust:
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
w.iat[i] = pow(1.0 / (1.0 - alpha), count)
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
else:
|
||||
sum_wts = 0.0
|
||||
prev_i = -1
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
if prev_i == -1:
|
||||
w.iat[i] = 1.0
|
||||
else:
|
||||
w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i)
|
||||
sum_wts += w.iat[i]
|
||||
prev_i = count
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
return w
|
||||
|
||||
|
||||
def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
result = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
expected = all_data.multiply(weights).cumsum().divide(weights.cumsum()).ffill()
|
||||
expected[
|
||||
all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1)
|
||||
] = np.nan
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(consistent_data)
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_ewm_consistency_var_debiasing_factors(
|
||||
all_data, adjust, ignore_na, min_periods
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=False)
|
||||
var_biased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=True)
|
||||
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
cum_sum = weights.cumsum().ffill()
|
||||
cum_sum_sq = (weights * weights).cumsum().ffill()
|
||||
numerator = cum_sum * cum_sum
|
||||
denominator = numerator - cum_sum_sq
|
||||
denominator[denominator <= 0.0] = np.nan
|
||||
var_debiasing_factors_x = numerator / denominator
|
||||
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
|
||||
mean_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if bias:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if not bias:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(all_data, bias=bias)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_series_cov_corr(
|
||||
series_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.var(bias=bias)
|
||||
)
|
||||
var_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
var_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
cov_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(series_data, bias=bias)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(series_data)
|
||||
std_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
std_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if bias:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
@@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f):
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
expanding_f_result = all_data.expanding(min_periods=min_periods).sum()
|
||||
expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply(
|
||||
func=f, raw=True
|
||||
)
|
||||
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean()
|
||||
mean_x = all_data.expanding(min_periods=min_periods).mean()
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof):
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data)
|
||||
std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_y = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data).expanding(min_periods=min_periods).mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_expanding_consistency_mean(all_data, min_periods):
|
||||
result = all_data.expanding(min_periods=min_periods).mean()
|
||||
expected = (
|
||||
all_data.expanding(min_periods=min_periods).sum()
|
||||
/ all_data.expanding(min_periods=min_periods).count()
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_expanding_consistency_constant(consistent_data, min_periods):
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.expanding(min_periods=min_periods).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_expanding_consistency_var_debiasing_factors(all_data, min_periods):
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.expanding(min_periods=min_periods).var()
|
||||
var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0)
|
||||
var_debiasing_factors_x = all_data.expanding().count() / (
|
||||
all_data.expanding().count() - 1.0
|
||||
).replace(0.0, np.nan)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
@@ -0,0 +1,244 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.fixture(params=[(1, 0), (5, 1)])
|
||||
def rolling_consistency_cases(request):
|
||||
"""window, min_periods"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_rolling_apply_consistency_sum(
|
||||
request, all_data, rolling_consistency_cases, center, f
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
rolling_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).sum()
|
||||
rolling_apply_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).apply(func=f, raw=True)
|
||||
tm.assert_equal(rolling_f_result, rolling_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
var_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_var_std_cov(
|
||||
all_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_series_cov_corr(
|
||||
series_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
var_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
cov_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(series_data)
|
||||
std_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
std_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
expected = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.sum()
|
||||
.divide(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
)
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_rolling_consistency_constant(
|
||||
consistent_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
mean_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_rolling_consistency_var_debiasing_factors(
|
||||
all_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var()
|
||||
var_biased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=0)
|
||||
var_debiasing_factors_x = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.count()
|
||||
.divide(
|
||||
(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
- 1.0
|
||||
).replace(0.0, np.nan)
|
||||
)
|
||||
)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
398
lib/python3.13/site-packages/pandas/tests/window/test_api.py
Normal file
398
lib/python3.13/site-packages/pandas/tests/window/test_api.py
Normal file
@@ -0,0 +1,398 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
DataError,
|
||||
SpecificationError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_getitem(step):
|
||||
frame = DataFrame(np.random.default_rng(2).standard_normal((5, 5)))
|
||||
r = frame.rolling(window=5, step=step)
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns)
|
||||
|
||||
r = frame.rolling(window=5, step=step)[1]
|
||||
assert r._selected_obj.name == frame[::step].columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = frame.rolling(window=5, step=step)[1, 3]
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
|
||||
|
||||
r = frame.rolling(window=5, step=step)[[1, 3]]
|
||||
tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
|
||||
|
||||
|
||||
def test_select_bad_cols():
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
g = df.rolling(window=5)
|
||||
with pytest.raises(KeyError, match="Columns not found: 'C'"):
|
||||
g[["C"]]
|
||||
with pytest.raises(KeyError, match="^[^A]+$"):
|
||||
# A should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
g[["A", "C"]]
|
||||
|
||||
|
||||
def test_attribute_access():
|
||||
df = DataFrame([[1, 2]], columns=["A", "B"])
|
||||
r = df.rolling(window=5)
|
||||
tm.assert_series_equal(r.A.sum(), r["A"].sum())
|
||||
msg = "'Rolling' object has no attribute 'F'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
r.F
|
||||
|
||||
|
||||
def tests_skip_nuisance(step):
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3, step=step)
|
||||
result = r[["A", "B"]].sum()
|
||||
expected = DataFrame(
|
||||
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
|
||||
columns=list("AB"),
|
||||
)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_sum_object_str_raises(step):
|
||||
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
|
||||
r = df.rolling(window=3, step=step)
|
||||
with pytest.raises(
|
||||
DataError, match="Cannot aggregate non-numeric type: object|string"
|
||||
):
|
||||
# GH#42738, enforced in 2.0
|
||||
r.sum()
|
||||
|
||||
|
||||
def test_agg(step):
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3, step=step)
|
||||
a_mean = r["A"].mean()
|
||||
a_std = r["A"].std()
|
||||
a_sum = r["A"].sum()
|
||||
b_mean = r["B"].mean()
|
||||
b_std = r["B"].std()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
|
||||
result = r.aggregate([np.mean, np.std])
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"):
|
||||
result = r.aggregate({"A": np.mean, "B": np.std})
|
||||
|
||||
expected = concat([a_mean, b_std], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std], axis=1)
|
||||
expected.columns = MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r["A"].aggregate(["mean", "sum"])
|
||||
expected = concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ["mean", "sum"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
# using a dict with renaming
|
||||
r.aggregate({"A": {"mean": "mean", "sum": "sum"}})
|
||||
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.aggregate(
|
||||
{"A": {"mean": "mean", "sum": "sum"}, "B": {"mean2": "mean", "sum2": "sum"}}
|
||||
)
|
||||
|
||||
result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
|
||||
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
|
||||
exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
expected.columns = MultiIndex.from_tuples(exp_cols)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}]
|
||||
)
|
||||
def test_multi_axis_1_raises(func):
|
||||
# GH#46904
|
||||
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]})
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
r = df.rolling(window=3, axis=1)
|
||||
with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
|
||||
r.agg(func)
|
||||
|
||||
|
||||
def test_agg_apply(raw):
|
||||
# passed lambda
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
|
||||
r = df.rolling(window=3)
|
||||
a_sum = r["A"].sum()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|std]"):
|
||||
result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
|
||||
rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw)
|
||||
expected = concat([a_sum, rcustom], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
|
||||
def test_agg_consistency(step):
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3, step=step)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r.agg([np.sum, np.mean]).columns
|
||||
expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r["A"].agg([np.sum, np.mean]).columns
|
||||
expected = Index(["sum", "mean"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"):
|
||||
result = r.agg({"A": [np.sum, np.mean]}).columns
|
||||
expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_nested_dicts():
|
||||
# API change for disallowing these types of nested dicts
|
||||
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
|
||||
r = df.rolling(window=3)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
|
||||
|
||||
expected = concat(
|
||||
[r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = MultiIndex.from_tuples(
|
||||
[("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")]
|
||||
)
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r[["A", "B"]].agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
|
||||
|
||||
def test_count_nonnumeric_types(step):
|
||||
# GH12541
|
||||
cols = [
|
||||
"int",
|
||||
"float",
|
||||
"string",
|
||||
"datetime",
|
||||
"timedelta",
|
||||
"periods",
|
||||
"fl_inf",
|
||||
"fl_nan",
|
||||
"str_nan",
|
||||
"dt_nat",
|
||||
"periods_nat",
|
||||
]
|
||||
dt_nat_col = [Timestamp("20170101"), Timestamp("20170203"), Timestamp(None)]
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"int": [1, 2, 3],
|
||||
"float": [4.0, 5.0, 6.0],
|
||||
"string": list("abc"),
|
||||
"datetime": date_range("20170101", periods=3),
|
||||
"timedelta": timedelta_range("1 s", periods=3, freq="s"),
|
||||
"periods": [
|
||||
Period("2012-01"),
|
||||
Period("2012-02"),
|
||||
Period("2012-03"),
|
||||
],
|
||||
"fl_inf": [1.0, 2.0, np.inf],
|
||||
"fl_nan": [1.0, 2.0, np.nan],
|
||||
"str_nan": ["aa", "bb", np.nan],
|
||||
"dt_nat": dt_nat_col,
|
||||
"periods_nat": [
|
||||
Period("2012-01"),
|
||||
Period("2012-02"),
|
||||
Period(None),
|
||||
],
|
||||
},
|
||||
columns=cols,
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"int": [1.0, 2.0, 2.0],
|
||||
"float": [1.0, 2.0, 2.0],
|
||||
"string": [1.0, 2.0, 2.0],
|
||||
"datetime": [1.0, 2.0, 2.0],
|
||||
"timedelta": [1.0, 2.0, 2.0],
|
||||
"periods": [1.0, 2.0, 2.0],
|
||||
"fl_inf": [1.0, 2.0, 2.0],
|
||||
"fl_nan": [1.0, 2.0, 1.0],
|
||||
"str_nan": [1.0, 2.0, 1.0],
|
||||
"dt_nat": [1.0, 2.0, 1.0],
|
||||
"periods_nat": [1.0, 2.0, 1.0],
|
||||
},
|
||||
columns=cols,
|
||||
)[::step]
|
||||
|
||||
result = df.rolling(window=2, min_periods=0, step=step).count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(1, min_periods=0, step=step).count()
|
||||
expected = df.notna().astype(float)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_preserve_metadata():
|
||||
# GH 10565
|
||||
s = Series(np.arange(100), name="foo")
|
||||
|
||||
s2 = s.rolling(30).sum()
|
||||
s3 = s.rolling(20).sum()
|
||||
assert s2.name == "foo"
|
||||
assert s3.name == "foo"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,window_size,expected_vals",
|
||||
[
|
||||
(
|
||||
"rolling",
|
||||
2,
|
||||
[
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[25.0, 30.0, 35.0, 30.0],
|
||||
[np.nan, np.nan, np.nan, np.nan],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[35.0, 40.0, 60.0, 40.0],
|
||||
[60.0, 80.0, 85.0, 80],
|
||||
],
|
||||
),
|
||||
(
|
||||
"expanding",
|
||||
None,
|
||||
[
|
||||
[10.0, 10.0, 20.0, 20.0],
|
||||
[15.0, 20.0, 25.0, 20.0],
|
||||
[20.0, 30.0, 30.0, 20.0],
|
||||
[10.0, 10.0, 30.0, 30.0],
|
||||
[20.0, 30.0, 35.0, 30.0],
|
||||
[26.666667, 40.0, 50.0, 30.0],
|
||||
[40.0, 80.0, 60.0, 30.0],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_multiple_agg_funcs(func, window_size, expected_vals):
|
||||
# GH 15072
|
||||
df = DataFrame(
|
||||
[
|
||||
["A", 10, 20],
|
||||
["A", 20, 30],
|
||||
["A", 30, 40],
|
||||
["B", 10, 30],
|
||||
["B", 30, 40],
|
||||
["B", 40, 80],
|
||||
["B", 80, 90],
|
||||
],
|
||||
columns=["stock", "low", "high"],
|
||||
)
|
||||
|
||||
f = getattr(df.groupby("stock"), func)
|
||||
if window_size:
|
||||
window = f(window_size)
|
||||
else:
|
||||
window = f()
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
|
||||
names=["stock", None],
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
|
||||
)
|
||||
expected = DataFrame(expected_vals, index=index, columns=columns)
|
||||
|
||||
result = window.agg({"low": ["mean", "max"], "high": ["mean", "min"]})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dont_modify_attributes_after_methods(
|
||||
arithmetic_win_operators, closed, center, min_periods, step
|
||||
):
|
||||
# GH 39554
|
||||
roll_obj = Series(range(1)).rolling(
|
||||
1, center=center, closed=closed, min_periods=min_periods, step=step
|
||||
)
|
||||
expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
|
||||
getattr(roll_obj, arithmetic_win_operators)()
|
||||
result = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_centered_axis_validation(step):
|
||||
# ok
|
||||
msg = "The 'axis' keyword in Series.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean()
|
||||
|
||||
# bad axis
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean()
|
||||
|
||||
# ok ok
|
||||
df = DataFrame(np.ones((10, 10)))
|
||||
msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.rolling(window=3, center=True, axis=0, step=step).mean()
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.rolling(window=3, center=True, axis=1, step=step).mean()
|
||||
|
||||
# bad axis
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
(df.rolling(window=3, center=True, axis=2, step=step).mean())
|
||||
|
||||
|
||||
def test_rolling_min_min_periods(step):
|
||||
a = Series([1, 2, 3, 4, 5])
|
||||
result = a.rolling(window=100, min_periods=1, step=step).min()
|
||||
expected = Series(np.ones(len(a)))[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
msg = "min_periods 5 must be <= window 3"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).min()
|
||||
|
||||
|
||||
def test_rolling_max_min_periods(step):
|
||||
a = Series([1, 2, 3, 4, 5], dtype=np.float64)
|
||||
result = a.rolling(window=100, min_periods=1, step=step).max()
|
||||
expected = a[::step]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
msg = "min_periods 5 must be <= window 3"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).max()
|
328
lib/python3.13/site-packages/pandas/tests/window/test_apply.py
Normal file
328
lib/python3.13/site-packages/pandas/tests/window/test_apply.py
Normal file
@@ -0,0 +1,328 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
# suppress warnings about empty slices, as we are deliberately testing
|
||||
# with a 0-length Series
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:.*(empty slice|0 for slice).*:RuntimeWarning"
|
||||
)
|
||||
|
||||
|
||||
def f(x):
|
||||
return x[np.isfinite(x)].mean()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_raw", [None, 1, 0])
|
||||
def test_rolling_apply_invalid_raw(bad_raw):
|
||||
with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"):
|
||||
Series(range(3)).rolling(1).apply(len, raw=bad_raw)
|
||||
|
||||
|
||||
def test_rolling_apply_out_of_bounds(engine_and_raw):
|
||||
# gh-1850
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
vals = Series([1, 2, 3, 4])
|
||||
|
||||
result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw)
|
||||
assert result.isna().all()
|
||||
|
||||
result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw)
|
||||
expected = Series([1, 3, 6, 10], dtype=float)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", [2, "2s"])
|
||||
def test_rolling_apply_with_pandas_objects(window):
|
||||
# 5071
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.random.default_rng(2).standard_normal(5),
|
||||
"B": np.random.default_rng(2).integers(0, 10, size=5),
|
||||
},
|
||||
index=date_range("20130101", periods=5, freq="s"),
|
||||
)
|
||||
|
||||
# we have an equal spaced timeseries index
|
||||
# so simulate removing the first period
|
||||
def f(x):
|
||||
if x.index[0] == df.index[0]:
|
||||
return np.nan
|
||||
return x.iloc[-1]
|
||||
|
||||
result = df.rolling(window).apply(f, raw=False)
|
||||
expected = df.iloc[2:].reindex_like(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.external_error_raised(AttributeError):
|
||||
df.rolling(window).apply(f, raw=True)
|
||||
|
||||
|
||||
def test_rolling_apply(engine_and_raw, step):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
expected = Series([], dtype="float64")
|
||||
result = expected.rolling(10, step=step).apply(
|
||||
lambda x: x.mean(), engine=engine, raw=raw
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# gh-8080
|
||||
s = Series([None, None, None])
|
||||
result = s.rolling(2, min_periods=0, step=step).apply(
|
||||
lambda x: len(x), engine=engine, raw=raw
|
||||
)
|
||||
expected = Series([1.0, 2.0, 2.0])[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_all_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
).set_index("A")
|
||||
* 2
|
||||
)
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
result = r.apply(lambda x: 1, engine=engine, raw=raw)
|
||||
expected = er.apply(lambda x: 1, engine=engine, raw=raw)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ragged_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = DataFrame({"B": range(5)})
|
||||
df.index = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
f = lambda x: 1
|
||||
result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
|
||||
expected = df.copy()
|
||||
expected["B"] = 1.0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_invalid_engine():
|
||||
with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"):
|
||||
Series(range(1)).rolling(1).apply(lambda x: x, engine="foo")
|
||||
|
||||
|
||||
def test_invalid_engine_kwargs_cython():
|
||||
with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"):
|
||||
Series(range(1)).rolling(1).apply(
|
||||
lambda x: x, engine="cython", engine_kwargs={"nopython": False}
|
||||
)
|
||||
|
||||
|
||||
def test_invalid_raw_numba():
|
||||
with pytest.raises(
|
||||
ValueError, match="raw must be `True` when using the numba engine"
|
||||
):
|
||||
Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]])
|
||||
def test_rolling_apply_args_kwargs(args_kwargs):
|
||||
# GH 33433
|
||||
def numpysum(x, par):
|
||||
return np.sum(x + par)
|
||||
|
||||
df = DataFrame({"gr": [1, 1], "a": [1, 2]})
|
||||
|
||||
idx = Index(["gr", "a"])
|
||||
expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx)
|
||||
|
||||
result = df.rolling(1).apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None])
|
||||
expected = Series([11.0, 12.0], index=midx, name="a")
|
||||
|
||||
gb_rolling = df.groupby("gr")["a"].rolling(1)
|
||||
|
||||
result = gb_rolling.apply(numpysum, args=args_kwargs[0], kwargs=args_kwargs[1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_nans(raw):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(50, min_periods=30).apply(f, raw=raw)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = obj.rolling(20, min_periods=15).apply(f, raw=raw)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = obj2.rolling(10, min_periods=5).apply(f, raw=raw)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = obj.rolling(20, min_periods=0).apply(f, raw=raw)
|
||||
result1 = obj.rolling(20, min_periods=1).apply(f, raw=raw)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
def test_center(raw):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(20, min_periods=15, center=True).apply(f, raw=raw)
|
||||
expected = (
|
||||
concat([obj, Series([np.nan] * 9)])
|
||||
.rolling(20, min_periods=15)
|
||||
.apply(f, raw=raw)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series(raw, series):
|
||||
result = series.rolling(50).apply(f, raw=raw)
|
||||
assert isinstance(result, Series)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(series[-50:]))
|
||||
|
||||
|
||||
def test_frame(raw, frame):
|
||||
result = frame.rolling(50).apply(f, raw=raw)
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[-50:, :].apply(np.mean, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
def test_time_rule_series(raw, series):
|
||||
win = 25
|
||||
minp = 10
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = ser.rolling(window=win, min_periods=minp).apply(f, raw=raw)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], np.mean(trunc_series))
|
||||
|
||||
|
||||
def test_time_rule_frame(raw, frame):
|
||||
win = 25
|
||||
minp = 10
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = frm.rolling(window=win, min_periods=minp).apply(f, raw=raw)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(np.mean, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
def test_min_periods(raw, series, minp, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=minp, step=step).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
expected = series.rolling(len(series), min_periods=minp, step=step).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
def test_center_reindex_series(raw, series):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
minp = 10
|
||||
|
||||
series_xp = (
|
||||
series.reindex(list(series.index) + s)
|
||||
.rolling(window=25, min_periods=minp)
|
||||
.apply(f, raw=raw)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = series.rolling(window=25, min_periods=minp, center=True).apply(
|
||||
f, raw=raw
|
||||
)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
def test_center_reindex_frame(raw):
|
||||
# shifter index
|
||||
frame = DataFrame(range(100), index=date_range("2020-01-01", freq="D", periods=100))
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
minp = 10
|
||||
|
||||
frame_xp = (
|
||||
frame.reindex(list(frame.index) + s)
|
||||
.rolling(window=25, min_periods=minp)
|
||||
.apply(f, raw=raw)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_axis1(raw):
|
||||
# GH 45912
|
||||
df = DataFrame([1, 2])
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw)
|
||||
expected = DataFrame([1.0, 2.0])
|
||||
tm.assert_frame_equal(result, expected)
|
@@ -0,0 +1,519 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import (
|
||||
BaseIndexer,
|
||||
FixedForwardWindowIndexer,
|
||||
)
|
||||
from pandas.core.indexers.objects import (
|
||||
ExpandingIndexer,
|
||||
FixedWindowIndexer,
|
||||
VariableOffsetWindowIndexer,
|
||||
)
|
||||
|
||||
from pandas.tseries.offsets import BusinessDay
|
||||
|
||||
|
||||
def test_bad_get_window_bounds_signature():
|
||||
class BadIndexer(BaseIndexer):
|
||||
def get_window_bounds(self):
|
||||
return None
|
||||
|
||||
indexer = BadIndexer()
|
||||
with pytest.raises(ValueError, match="BadIndexer does not implement"):
|
||||
Series(range(5)).rolling(indexer)
|
||||
|
||||
|
||||
def test_expanding_indexer():
|
||||
s = Series(range(10))
|
||||
indexer = ExpandingIndexer()
|
||||
result = s.rolling(indexer).mean()
|
||||
expected = s.expanding().mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_constructor_arg():
|
||||
# Example found in computation.rst
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = i + 1
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = df.rolling(indexer).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexer_accepts_rolling_args():
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if (
|
||||
center
|
||||
and min_periods == 1
|
||||
and closed == "both"
|
||||
and step == 1
|
||||
and i == 2
|
||||
):
|
||||
start[i] = 0
|
||||
end[i] = num_values
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
indexer = CustomIndexer(window_size=1)
|
||||
result = df.rolling(
|
||||
indexer, center=True, min_periods=1, closed="both", step=1
|
||||
).sum()
|
||||
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,np_func,expected,np_kwargs",
|
||||
[
|
||||
("count", len, [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, np.nan], {}),
|
||||
("min", np.min, [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0, np.nan], {}),
|
||||
(
|
||||
"max",
|
||||
np.max,
|
||||
[2.0, 3.0, 4.0, 100.0, 100.0, 100.0, 8.0, 9.0, 9.0, np.nan],
|
||||
{},
|
||||
),
|
||||
(
|
||||
"std",
|
||||
np.std,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
55.71654452,
|
||||
54.85739087,
|
||||
53.9845657,
|
||||
1.0,
|
||||
1.0,
|
||||
0.70710678,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"var",
|
||||
np.var,
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
3104.333333,
|
||||
3009.333333,
|
||||
2914.333333,
|
||||
1.0,
|
||||
1.0,
|
||||
0.500000,
|
||||
np.nan,
|
||||
],
|
||||
{"ddof": 1},
|
||||
),
|
||||
(
|
||||
"median",
|
||||
np.median,
|
||||
[1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 7.0, 8.0, 8.5, np.nan],
|
||||
{},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_window(
|
||||
frame_or_series, func, np_func, expected, np_kwargs, step
|
||||
):
|
||||
# GH 32865
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
|
||||
match = "Forward-looking windows can't have center=True"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, center=True)
|
||||
getattr(rolling, func)()
|
||||
|
||||
match = "Forward-looking windows don't support setting the closed argument"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
rolling = frame_or_series(values).rolling(window=indexer, closed="right")
|
||||
getattr(rolling, func)()
|
||||
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=2, step=step)
|
||||
result = getattr(rolling, func)()
|
||||
|
||||
# Check that the function output matches the explicitly provided array
|
||||
expected = frame_or_series(expected)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# Check that the rolling function output matches applying an alternative
|
||||
# function to the rolling window object
|
||||
expected2 = frame_or_series(rolling.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result, expected2)
|
||||
|
||||
# Check that the function output matches applying an alternative function
|
||||
# if min_periods isn't specified
|
||||
# GH 39604: After count-min_periods deprecation, apply(lambda x: len(x))
|
||||
# is equivalent to count after setting min_periods=0
|
||||
min_periods = 0 if func == "count" else None
|
||||
rolling3 = frame_or_series(values).rolling(window=indexer, min_periods=min_periods)
|
||||
result3 = getattr(rolling3, func)()
|
||||
expected3 = frame_or_series(rolling3.apply(lambda x: np_func(x, **np_kwargs)))
|
||||
tm.assert_equal(result3, expected3)
|
||||
|
||||
|
||||
def test_rolling_forward_skewness(frame_or_series, step):
|
||||
values = np.arange(10.0)
|
||||
values[5] = 100.0
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=5)
|
||||
rolling = frame_or_series(values).rolling(window=indexer, min_periods=3, step=step)
|
||||
result = rolling.skew()
|
||||
|
||||
expected = frame_or_series(
|
||||
[
|
||||
0.0,
|
||||
2.232396,
|
||||
2.229508,
|
||||
2.228340,
|
||||
2.229091,
|
||||
2.231989,
|
||||
0.0,
|
||||
0.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
)[::step]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,expected",
|
||||
[
|
||||
("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan]),
|
||||
(
|
||||
"corr",
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
1.0,
|
||||
0.8704775290207161,
|
||||
0.018229084250926637,
|
||||
-0.861357304646493,
|
||||
1.0,
|
||||
1.0,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_forward_cov_corr(func, expected):
|
||||
values1 = np.arange(10).reshape(-1, 1)
|
||||
values2 = values1 * 2
|
||||
values1[5, 0] = 100
|
||||
values = np.concatenate([values1, values2], axis=1)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=3)
|
||||
rolling = DataFrame(values).rolling(window=indexer, min_periods=3)
|
||||
# We are interested in checking only pairwise covariance / correlation
|
||||
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
|
||||
result = result.reset_index(drop=True)
|
||||
expected = Series(expected).reset_index(drop=True)
|
||||
expected.name = result.name
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"closed,expected_data",
|
||||
[
|
||||
["right", [0.0, 1.0, 2.0, 3.0, 7.0, 12.0, 6.0, 7.0, 8.0, 9.0]],
|
||||
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
|
||||
],
|
||||
)
|
||||
def test_non_fixed_variable_window_indexer(closed, expected_data):
|
||||
index = date_range("2020", periods=10)
|
||||
df = DataFrame(range(10), index=index)
|
||||
offset = BusinessDay(1)
|
||||
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
|
||||
result = df.rolling(indexer, closed=closed).sum()
|
||||
expected = DataFrame(expected_data, index=index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_dti():
|
||||
# GH 54379
|
||||
with pytest.raises(ValueError, match="index must be a DatetimeIndex."):
|
||||
VariableOffsetWindowIndexer(index="foo", offset=BusinessDay(1))
|
||||
|
||||
|
||||
def test_variableoffsetwindowindexer_not_offset():
|
||||
# GH 54379
|
||||
idx = date_range("2020", periods=10)
|
||||
with pytest.raises(ValueError, match="offset must be a DateOffset-like object."):
|
||||
VariableOffsetWindowIndexer(index=idx, offset="foo")
|
||||
|
||||
|
||||
def test_fixed_forward_indexer_count(step):
|
||||
# GH: 35579
|
||||
df = DataFrame({"b": [None, None, None, 7]})
|
||||
indexer = FixedForwardWindowIndexer(window_size=2)
|
||||
result = df.rolling(window=indexer, min_periods=0, step=step).count()
|
||||
expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("end_value", "values"), [(1, [0.0, 1, 1, 3, 2]), (-1, [0.0, 1, 0, 3, 1])]
|
||||
)
|
||||
@pytest.mark.parametrize(("func", "args"), [("median", []), ("quantile", [0.5])])
|
||||
def test_indexer_quantile_sum(end_value, values, func, args):
|
||||
# GH 37153
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
start = np.empty(num_values, dtype=np.int64)
|
||||
end = np.empty(num_values, dtype=np.int64)
|
||||
for i in range(num_values):
|
||||
if self.use_expanding[i]:
|
||||
start[i] = 0
|
||||
end[i] = max(i + end_value, 1)
|
||||
else:
|
||||
start[i] = i
|
||||
end[i] = i + self.window_size
|
||||
return start, end
|
||||
|
||||
use_expanding = [True, False, True, False, True]
|
||||
df = DataFrame({"values": range(5)})
|
||||
|
||||
indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
|
||||
result = getattr(df.rolling(indexer), func)(*args)
|
||||
expected = DataFrame({"values": values})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_class", [FixedWindowIndexer, FixedForwardWindowIndexer, ExpandingIndexer]
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 12])
|
||||
@pytest.mark.parametrize(
|
||||
"df_data",
|
||||
[
|
||||
{"a": [1, 1], "b": [0, 1]},
|
||||
{"a": [1, 2], "b": [0, 1]},
|
||||
{"a": [1] * 16, "b": [np.nan, 1, 2, np.nan] + list(range(4, 16))},
|
||||
],
|
||||
)
|
||||
def test_indexers_are_reusable_after_groupby_rolling(
|
||||
indexer_class, window_size, df_data
|
||||
):
|
||||
# GH 43267
|
||||
df = DataFrame(df_data)
|
||||
num_trials = 3
|
||||
indexer = indexer_class(window_size=window_size)
|
||||
original_window_size = indexer.window_size
|
||||
for i in range(num_trials):
|
||||
df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
assert indexer.window_size == original_window_size
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"window_size, num_values, expected_start, expected_end",
|
||||
[
|
||||
(1, 1, [0], [1]),
|
||||
(1, 2, [0, 1], [1, 2]),
|
||||
(2, 1, [0], [1]),
|
||||
(2, 2, [0, 1], [2, 2]),
|
||||
(5, 12, range(12), list(range(5, 12)) + [12] * 5),
|
||||
(12, 5, range(5), [5] * 5),
|
||||
(0, 0, np.array([]), np.array([])),
|
||||
(1, 0, np.array([]), np.array([])),
|
||||
(0, 1, [0], [0]),
|
||||
],
|
||||
)
|
||||
def test_fixed_forward_indexer_bounds(
|
||||
window_size, num_values, expected_start, expected_end, step
|
||||
):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
start, end = indexer.get_window_bounds(num_values=num_values, step=step)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
start, np.array(expected_start[::step]), check_dtype=False
|
||||
)
|
||||
tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False)
|
||||
assert len(start) == len(end)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, window_size, expected",
|
||||
[
|
||||
(
|
||||
DataFrame({"b": [0, 1, 2], "a": [1, 2, 2]}),
|
||||
2,
|
||||
Series(
|
||||
[0, 1.5, 2.0],
|
||||
index=MultiIndex.from_arrays([[1, 2, 2], range(3)], names=["a", None]),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
(
|
||||
DataFrame(
|
||||
{
|
||||
"b": [np.nan, 1, 2, np.nan] + list(range(4, 18)),
|
||||
"a": [1] * 7 + [2] * 11,
|
||||
"c": range(18),
|
||||
}
|
||||
),
|
||||
12,
|
||||
Series(
|
||||
[
|
||||
3.6,
|
||||
3.6,
|
||||
4.25,
|
||||
5.0,
|
||||
5.0,
|
||||
5.5,
|
||||
6.0,
|
||||
12.0,
|
||||
12.5,
|
||||
13.0,
|
||||
13.5,
|
||||
14.0,
|
||||
14.5,
|
||||
15.0,
|
||||
15.5,
|
||||
16.0,
|
||||
16.5,
|
||||
17.0,
|
||||
],
|
||||
index=MultiIndex.from_arrays(
|
||||
[[1] * 7 + [2] * 11, range(18)], names=["a", None]
|
||||
),
|
||||
name="b",
|
||||
dtype=np.float64,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_groupby_with_fixed_forward_specific(df, window_size, expected):
|
||||
# GH 43267
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"group_keys",
|
||||
[
|
||||
(1,),
|
||||
(1, 2),
|
||||
(2, 1),
|
||||
(1, 1, 2),
|
||||
(1, 2, 1),
|
||||
(1, 1, 2, 2),
|
||||
(1, 2, 3, 2, 3),
|
||||
(1, 1, 2) * 4,
|
||||
(1, 2, 3) * 5,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("window_size", [1, 2, 3, 4, 5, 8, 20])
|
||||
def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
|
||||
# GH 43267
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.array(list(group_keys)),
|
||||
"b": np.arange(len(group_keys), dtype=np.float64) + 17,
|
||||
"c": np.arange(len(group_keys), dtype=np.int64),
|
||||
}
|
||||
)
|
||||
|
||||
indexer = FixedForwardWindowIndexer(window_size=window_size)
|
||||
result = df.groupby("a")["b"].rolling(window=indexer, min_periods=1).sum()
|
||||
result.index.names = ["a", "c"]
|
||||
|
||||
groups = df.groupby("a")[["a", "b", "c"]]
|
||||
manual = concat(
|
||||
[
|
||||
g.assign(
|
||||
b=[
|
||||
g["b"].iloc[i : i + window_size].sum(min_count=1)
|
||||
for i in range(len(g))
|
||||
]
|
||||
)
|
||||
for _, g in groups
|
||||
]
|
||||
)
|
||||
manual = manual.set_index(["a", "c"])["b"]
|
||||
|
||||
tm.assert_series_equal(result, manual)
|
||||
|
||||
|
||||
def test_unequal_start_end_bounds():
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([1, 2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series(1).rolling(indexer)
|
||||
match = "start"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
||||
|
||||
|
||||
def test_unequal_bounds_to_object():
|
||||
# GH 44470
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([1]), np.array([2])
|
||||
|
||||
indexer = CustomIndexer()
|
||||
roll = Series([1, 1]).rolling(indexer)
|
||||
match = "start and end"
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.mean()
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
next(iter(roll))
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.corr(pairwise=True)
|
||||
|
||||
with pytest.raises(ValueError, match=match):
|
||||
roll.cov(pairwise=True)
|
@@ -0,0 +1,111 @@
|
||||
from functools import partial
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.window.aggregations as window_aggregations
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def _get_rolling_aggregations():
|
||||
# list pairs of name and function
|
||||
# each function has this signature:
|
||||
# (const float64_t[:] values, ndarray[int64_t] start,
|
||||
# ndarray[int64_t] end, int64_t minp) -> np.ndarray
|
||||
named_roll_aggs = (
|
||||
[
|
||||
("roll_sum", window_aggregations.roll_sum),
|
||||
("roll_mean", window_aggregations.roll_mean),
|
||||
]
|
||||
+ [
|
||||
(f"roll_var({ddof})", partial(window_aggregations.roll_var, ddof=ddof))
|
||||
for ddof in [0, 1]
|
||||
]
|
||||
+ [
|
||||
("roll_skew", window_aggregations.roll_skew),
|
||||
("roll_kurt", window_aggregations.roll_kurt),
|
||||
("roll_median_c", window_aggregations.roll_median_c),
|
||||
("roll_max", window_aggregations.roll_max),
|
||||
("roll_min", window_aggregations.roll_min),
|
||||
]
|
||||
+ [
|
||||
(
|
||||
f"roll_quantile({quantile},{interpolation})",
|
||||
partial(
|
||||
window_aggregations.roll_quantile,
|
||||
quantile=quantile,
|
||||
interpolation=interpolation,
|
||||
),
|
||||
)
|
||||
for quantile in [0.0001, 0.5, 0.9999]
|
||||
for interpolation in window_aggregations.interpolation_types
|
||||
]
|
||||
+ [
|
||||
(
|
||||
f"roll_rank({percentile},{method},{ascending})",
|
||||
partial(
|
||||
window_aggregations.roll_rank,
|
||||
percentile=percentile,
|
||||
method=method,
|
||||
ascending=ascending,
|
||||
),
|
||||
)
|
||||
for percentile in [True, False]
|
||||
for method in window_aggregations.rolling_rank_tiebreakers.keys()
|
||||
for ascending in [True, False]
|
||||
]
|
||||
)
|
||||
# unzip to a list of 2 tuples, names and functions
|
||||
unzipped = list(zip(*named_roll_aggs))
|
||||
return {"ids": unzipped[0], "params": unzipped[1]}
|
||||
|
||||
|
||||
_rolling_aggregations = _get_rolling_aggregations()
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=_rolling_aggregations["params"], ids=_rolling_aggregations["ids"]
|
||||
)
|
||||
def rolling_aggregation(request):
|
||||
"""Make a rolling aggregation function as fixture."""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_rolling_aggregation_boundary_consistency(rolling_aggregation):
|
||||
# GH-45647
|
||||
minp, step, width, size, selection = 0, 1, 3, 11, [2, 7]
|
||||
values = np.arange(1, 1 + size, dtype=np.float64)
|
||||
end = np.arange(width, size, step, dtype=np.int64)
|
||||
start = end - width
|
||||
selarr = np.array(selection, dtype=np.int32)
|
||||
result = Series(rolling_aggregation(values, start[selarr], end[selarr], minp))
|
||||
expected = Series(rolling_aggregation(values, start, end, minp)[selarr])
|
||||
tm.assert_equal(expected, result)
|
||||
|
||||
|
||||
def test_rolling_aggregation_with_unused_elements(rolling_aggregation):
|
||||
# GH-45647
|
||||
minp, width = 0, 5 # width at least 4 for kurt
|
||||
size = 2 * width + 5
|
||||
values = np.arange(1, size + 1, dtype=np.float64)
|
||||
values[width : width + 2] = sys.float_info.min
|
||||
values[width + 2] = np.nan
|
||||
values[width + 3 : width + 5] = sys.float_info.max
|
||||
start = np.array([0, size - width], dtype=np.int64)
|
||||
end = np.array([width, size], dtype=np.int64)
|
||||
loc = np.array(
|
||||
[j for i in range(len(start)) for j in range(start[i], end[i])],
|
||||
dtype=np.int32,
|
||||
)
|
||||
result = Series(rolling_aggregation(values, start, end, minp))
|
||||
compact_values = np.array(values[loc], dtype=np.float64)
|
||||
compact_start = np.arange(0, len(start) * width, width, dtype=np.int64)
|
||||
compact_end = compact_start + width
|
||||
expected = Series(
|
||||
rolling_aggregation(compact_values, compact_start, compact_end, minp)
|
||||
)
|
||||
assert np.isfinite(expected.values).all(), "Not all expected values are finite"
|
||||
tm.assert_equal(expected, result)
|
173
lib/python3.13/site-packages/pandas/tests/window/test_dtypes.py
Normal file
173
lib/python3.13/site-packages/pandas/tests/window/test_dtypes.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import DataError
|
||||
|
||||
from pandas.core.dtypes.common import pandas_dtype
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
# gh-12373 : rolling functions error on float32 data
|
||||
# make sure rolling functions works for different dtypes
|
||||
#
|
||||
# further note that we are only checking rolling for fully dtype
|
||||
# compliance (though both expanding and ewm inherit)
|
||||
|
||||
|
||||
def get_dtype(dtype, coerce_int=None):
|
||||
if coerce_int is False and "int" in dtype:
|
||||
return None
|
||||
return pandas_dtype(dtype)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"category",
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"int64",
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"uint64",
|
||||
"float16",
|
||||
"float32",
|
||||
"float64",
|
||||
"m8[ns]",
|
||||
"M8[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
]
|
||||
)
|
||||
def dtypes(request):
|
||||
"""Dtypes for window tests"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, data, expected_data, coerce_int, min_periods",
|
||||
[
|
||||
("count", np.arange(5), [1, 2, 2, 2, 2], True, 0),
|
||||
("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0),
|
||||
("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0),
|
||||
("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None),
|
||||
("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None),
|
||||
("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None),
|
||||
("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None),
|
||||
("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None),
|
||||
("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None),
|
||||
("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None),
|
||||
("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None),
|
||||
("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None),
|
||||
("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
|
||||
("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
|
||||
("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None),
|
||||
("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None),
|
||||
("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None),
|
||||
(
|
||||
"std",
|
||||
[0, 1, 2, np.nan, 4],
|
||||
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
|
||||
False,
|
||||
None,
|
||||
),
|
||||
("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None),
|
||||
("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None),
|
||||
("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None),
|
||||
("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
|
||||
("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
|
||||
(
|
||||
"median",
|
||||
[0, 1, 2, np.nan, 4],
|
||||
[np.nan, 0.5, 1.5, np.nan, np.nan],
|
||||
False,
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_dtypes(
|
||||
method, data, expected_data, coerce_int, dtypes, min_periods, step
|
||||
):
|
||||
ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
|
||||
rolled = ser.rolling(2, min_periods=min_periods, step=step)
|
||||
|
||||
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
|
||||
msg = "No numeric types to aggregate"
|
||||
with pytest.raises(DataError, match=msg):
|
||||
getattr(rolled, method)()
|
||||
else:
|
||||
result = getattr(rolled, method)()
|
||||
expected = Series(expected_data, dtype="float64")[::step]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_nullable_int(any_signed_int_ea_dtype, step):
|
||||
# GH 43016
|
||||
ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype)
|
||||
result = ser.rolling(2, step=step).mean()
|
||||
expected = Series([np.nan, 0.5, np.nan])[::step]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, expected_data, min_periods",
|
||||
[
|
||||
("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0),
|
||||
(
|
||||
"max",
|
||||
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"min",
|
||||
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"sum",
|
||||
{0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"mean",
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"std",
|
||||
{
|
||||
0: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
1: Series([np.nan] + [np.sqrt(2)] * 4),
|
||||
},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"var",
|
||||
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
|
||||
None,
|
||||
),
|
||||
(
|
||||
"median",
|
||||
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
|
||||
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
|
||||
rolled = df.rolling(2, min_periods=min_periods, step=step)
|
||||
|
||||
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
|
||||
msg = "Cannot aggregate non-numeric type"
|
||||
with pytest.raises(DataError, match=msg):
|
||||
getattr(rolled, method)()
|
||||
else:
|
||||
result = getattr(rolled, method)()
|
||||
expected = DataFrame(expected_data, dtype="float64")[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
727
lib/python3.13/site-packages/pandas/tests/window/test_ewm.py
Normal file
727
lib/python3.13/site-packages/pandas/tests/window/test_ewm.py
Normal file
@@ -0,0 +1,727 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_doc_string():
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.ewm(com=0.5).mean()
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
c = frame_or_series(range(5)).ewm
|
||||
|
||||
# valid
|
||||
c(com=0.5)
|
||||
c(span=1.5)
|
||||
c(alpha=0.5)
|
||||
c(halflife=0.75)
|
||||
c(com=0.5, span=None)
|
||||
c(alpha=0.5, com=None)
|
||||
c(halflife=0.75, alpha=None)
|
||||
|
||||
# not valid: mutually exclusive
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(com=0.5, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(span=1.5, halflife=0.75)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(alpha=0.5, span=1.5)
|
||||
|
||||
# not valid: com < 0
|
||||
msg = "comass must satisfy: comass >= 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(com=-0.5)
|
||||
|
||||
# not valid: span < 1
|
||||
msg = "span must satisfy: span >= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(span=0.5)
|
||||
|
||||
# not valid: halflife <= 0
|
||||
msg = "halflife must satisfy: halflife > 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(halflife=0)
|
||||
|
||||
# not valid: alpha <= 0 or alpha > 1
|
||||
msg = "alpha must satisfy: 0 < alpha <= 1"
|
||||
for alpha in (-0.5, 1.5):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(alpha=alpha)
|
||||
|
||||
|
||||
def test_ewma_times_not_datetime_type():
|
||||
msg = r"times must be datetime64 dtype."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(times=np.arange(5))
|
||||
|
||||
|
||||
def test_ewma_times_not_same_length():
|
||||
msg = "times must be the same length as the object."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(times=np.arange(4).astype("datetime64[ns]"))
|
||||
|
||||
|
||||
def test_ewma_halflife_not_correct_type():
|
||||
msg = "halflife must be a timedelta convertible object"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))
|
||||
|
||||
|
||||
def test_ewma_halflife_without_times(halflife_with_times):
|
||||
msg = "halflife can only be a timedelta convertible argument if times is not None."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(range(5)).ewm(halflife=halflife_with_times)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"times",
|
||||
[
|
||||
np.arange(10).astype("datetime64[D]").astype("datetime64[ns]"),
|
||||
date_range("2000", freq="D", periods=10),
|
||||
date_range("2000", freq="D", periods=10).tz_localize("UTC"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("min_periods", [0, 2])
|
||||
def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
|
||||
halflife = halflife_with_times
|
||||
data = np.arange(10.0)
|
||||
data[::2] = np.nan
|
||||
df = DataFrame({"A": data})
|
||||
result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
|
||||
expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit):
|
||||
tz = tz_aware_fixture
|
||||
halflife = "23 days"
|
||||
times = (
|
||||
DatetimeIndex(["2020-01-01", "2020-01-10T00:04:05", "2020-02-23T05:00:23"])
|
||||
.tz_localize(tz)
|
||||
.as_unit(unit)
|
||||
)
|
||||
data = np.arange(3)
|
||||
df = DataFrame(data)
|
||||
result = df.ewm(halflife=halflife, times=times).mean()
|
||||
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewm_with_nat_raises(halflife_with_times):
|
||||
# GH#38535
|
||||
ser = Series(range(1))
|
||||
times = DatetimeIndex(["NaT"])
|
||||
with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
|
||||
ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
|
||||
|
||||
|
||||
def test_ewm_with_times_getitem(halflife_with_times):
|
||||
# GH 40164
|
||||
halflife = halflife_with_times
|
||||
data = np.arange(10.0)
|
||||
data[::2] = np.nan
|
||||
times = date_range("2000", freq="D", periods=10)
|
||||
df = DataFrame({"A": data, "B": data})
|
||||
result = df.ewm(halflife=halflife, times=times)["A"].mean()
|
||||
expected = df.ewm(halflife=1.0)["A"].mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"])
|
||||
def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
|
||||
# GH 40164
|
||||
kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na}
|
||||
ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs)
|
||||
expected = {attr: getattr(ewm, attr) for attr in ewm._attributes}
|
||||
ewm_slice = ewm["A"]
|
||||
result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_ewma_times_adjust_false_raises():
|
||||
# GH 40098
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="times is not supported with adjust=False."
|
||||
):
|
||||
Series(range(1)).ewm(
|
||||
0.1, adjust=False, times=date_range("2000", freq="D", periods=1)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, expected",
|
||||
[
|
||||
[
|
||||
"mean",
|
||||
DataFrame(
|
||||
{
|
||||
0: range(5),
|
||||
1: range(4, 9),
|
||||
2: [7.428571, 9, 10.571429, 12.142857, 13.714286],
|
||||
},
|
||||
dtype=float,
|
||||
),
|
||||
],
|
||||
[
|
||||
"std",
|
||||
DataFrame(
|
||||
{
|
||||
0: [np.nan] * 5,
|
||||
1: [4.242641] * 5,
|
||||
2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788],
|
||||
}
|
||||
),
|
||||
],
|
||||
[
|
||||
"var",
|
||||
DataFrame(
|
||||
{
|
||||
0: [np.nan] * 5,
|
||||
1: [18.0] * 5,
|
||||
2: [21.428571, 27, 33.428571, 40.714286, 48.857143],
|
||||
}
|
||||
),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_float_dtype_ewma(func, expected, float_numpy_dtype):
|
||||
# GH#42452
|
||||
|
||||
df = DataFrame(
|
||||
{0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype
|
||||
)
|
||||
msg = "Support for axis=1 in DataFrame.ewm is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
e = df.ewm(alpha=0.5, axis=1)
|
||||
result = getattr(e, func)()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_times_string_col_raises():
|
||||
# GH 43265
|
||||
df = DataFrame(
|
||||
{"A": np.arange(10.0), "time_col": date_range("2000", freq="D", periods=10)}
|
||||
)
|
||||
with pytest.raises(ValueError, match="times must be datetime64"):
|
||||
df.ewm(halflife="1 day", min_periods=0, times="time_col")
|
||||
|
||||
|
||||
def test_ewm_sum_adjust_false_notimplemented():
|
||||
data = Series(range(1)).ewm(com=1, adjust=False)
|
||||
with pytest.raises(NotImplementedError, match="sum is not"):
|
||||
data.sum()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expected_data, ignore",
|
||||
[[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]],
|
||||
)
|
||||
def test_ewm_sum(expected_data, ignore):
|
||||
# xref from Numbagg tests
|
||||
# https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50
|
||||
data = Series([10, 0, np.nan, 10])
|
||||
result = data.ewm(alpha=0.5, ignore_na=ignore).sum()
|
||||
expected = Series(expected_data)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_adjust():
|
||||
vals = Series(np.zeros(1000))
|
||||
vals[5] = 1
|
||||
result = vals.ewm(span=100, adjust=False).mean().sum()
|
||||
assert np.abs(result - 1) < 1e-2
|
||||
|
||||
|
||||
def test_ewma_cases(adjust, ignore_na):
|
||||
# try adjust/ignore_na args matrix
|
||||
|
||||
s = Series([1.0, 2.0, 4.0, 8.0])
|
||||
|
||||
if adjust:
|
||||
expected = Series([1.0, 1.6, 2.736842, 4.923077])
|
||||
else:
|
||||
expected = Series([1.0, 1.333333, 2.222222, 4.148148])
|
||||
|
||||
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewma_nan_handling():
|
||||
s = Series([1.0] + [np.nan] * 5 + [1.0])
|
||||
result = s.ewm(com=5).mean()
|
||||
tm.assert_series_equal(result, Series([1.0] * len(s)))
|
||||
|
||||
s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0])
|
||||
result = s.ewm(com=5).mean()
|
||||
tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s, adjust, ignore_na, w",
|
||||
[
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
True,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
True,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
False,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, 101.0]),
|
||||
False,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
True,
|
||||
False,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
True,
|
||||
True,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
False,
|
||||
False,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0]),
|
||||
False,
|
||||
True,
|
||||
[(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
True,
|
||||
False,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
True,
|
||||
True,
|
||||
[np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
False,
|
||||
False,
|
||||
[
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
np.nan,
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]),
|
||||
False,
|
||||
True,
|
||||
[
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
np.nan,
|
||||
np.nan,
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
np.nan,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
True,
|
||||
False,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
1.0,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
True,
|
||||
True,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))),
|
||||
1.0,
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
False,
|
||||
False,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 3,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
|
||||
(1.0 / (1.0 + 2.0))
|
||||
* ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))),
|
||||
],
|
||||
),
|
||||
(
|
||||
Series([1.0, np.nan, 101.0, 50.0]),
|
||||
False,
|
||||
True,
|
||||
[
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) ** 2,
|
||||
np.nan,
|
||||
(1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)),
|
||||
(1.0 / (1.0 + 2.0)),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ewma_nan_handling_cases(s, adjust, ignore_na, w):
|
||||
# GH 7603
|
||||
expected = (s.multiply(w).cumsum() / Series(w).cumsum()).ffill()
|
||||
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
if ignore_na is False:
|
||||
# check that ignore_na defaults to False
|
||||
result = s.ewm(com=2.0, adjust=adjust).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ewm_alpha():
|
||||
# GH 10789
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
|
||||
s = Series(arr)
|
||||
a = s.ewm(alpha=0.61722699889169674).mean()
|
||||
b = s.ewm(com=0.62014947789973052).mean()
|
||||
c = s.ewm(span=2.240298955799461).mean()
|
||||
d = s.ewm(halflife=0.721792864318).mean()
|
||||
tm.assert_series_equal(a, b)
|
||||
tm.assert_series_equal(a, c)
|
||||
tm.assert_series_equal(a, d)
|
||||
|
||||
|
||||
def test_ewm_domain_checks():
|
||||
# GH 12492
|
||||
arr = np.random.default_rng(2).standard_normal(100)
|
||||
locs = np.arange(20, 40)
|
||||
arr[locs] = np.nan
|
||||
|
||||
s = Series(arr)
|
||||
msg = "comass must satisfy: comass >= 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(com=-0.1)
|
||||
s.ewm(com=0.0)
|
||||
s.ewm(com=0.1)
|
||||
|
||||
msg = "span must satisfy: span >= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=0.0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=0.9)
|
||||
s.ewm(span=1.0)
|
||||
s.ewm(span=1.1)
|
||||
|
||||
msg = "halflife must satisfy: halflife > 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=0.0)
|
||||
s.ewm(halflife=0.1)
|
||||
|
||||
msg = "alpha must satisfy: 0 < alpha <= 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=-0.1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=0.0)
|
||||
s.ewm(alpha=0.1)
|
||||
s.ewm(alpha=1.0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(alpha=1.1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["mean", "std", "var"])
|
||||
def test_ew_empty_series(method):
|
||||
vals = Series([], dtype=np.float64)
|
||||
|
||||
ewm = vals.ewm(3)
|
||||
result = getattr(ewm, method)()
|
||||
tm.assert_almost_equal(result, vals)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1])
|
||||
@pytest.mark.parametrize("name", ["mean", "var", "std"])
|
||||
def test_ew_min_periods(min_periods, name):
|
||||
# excluding NaNs correctly
|
||||
arr = np.random.default_rng(2).standard_normal(50)
|
||||
arr[:10] = np.nan
|
||||
arr[-10:] = np.nan
|
||||
s = Series(arr)
|
||||
|
||||
# check min_periods
|
||||
# GH 7898
|
||||
result = getattr(s.ewm(com=50, min_periods=2), name)()
|
||||
assert result[:11].isna().all()
|
||||
assert not result[11:].isna().any()
|
||||
|
||||
result = getattr(s.ewm(com=50, min_periods=min_periods), name)()
|
||||
if name == "mean":
|
||||
assert result[:10].isna().all()
|
||||
assert not result[10:].isna().any()
|
||||
else:
|
||||
# ewm.std, ewm.var (with bias=False) require at least
|
||||
# two values
|
||||
assert result[:11].isna().all()
|
||||
assert not result[11:].isna().any()
|
||||
|
||||
# check series of length 0
|
||||
result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)()
|
||||
tm.assert_series_equal(result, Series(dtype="float64"))
|
||||
|
||||
# check series of length 1
|
||||
result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)()
|
||||
if name == "mean":
|
||||
tm.assert_series_equal(result, Series([1.0]))
|
||||
else:
|
||||
# ewm.std, ewm.var with bias=False require at least
|
||||
# two values
|
||||
tm.assert_series_equal(result, Series([np.nan]))
|
||||
|
||||
# pass in ints
|
||||
result2 = getattr(Series(np.arange(50)).ewm(span=10), name)()
|
||||
assert result2.dtype == np.float64
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_ewm_corr_cov(name):
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
B = A[2:] + np.random.default_rng(2).standard_normal(48)
|
||||
|
||||
A[:10] = np.nan
|
||||
B.iloc[-10:] = np.nan
|
||||
|
||||
result = getattr(A.ewm(com=20, min_periods=5), name)(B)
|
||||
assert np.isnan(result.values[:14]).all()
|
||||
assert not np.isnan(result.values[14:]).any()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1, 2])
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_ewm_corr_cov_min_periods(name, min_periods):
|
||||
# GH 7898
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
B = A[2:] + np.random.default_rng(2).standard_normal(48)
|
||||
|
||||
A[:10] = np.nan
|
||||
B.iloc[-10:] = np.nan
|
||||
|
||||
result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
|
||||
# binary functions (ewmcov, ewmcorr) with bias=False require at
|
||||
# least two values
|
||||
assert np.isnan(result.values[:11]).all()
|
||||
assert not np.isnan(result.values[11:]).any()
|
||||
|
||||
# check series of length 0
|
||||
empty = Series([], dtype=np.float64)
|
||||
result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty)
|
||||
tm.assert_series_equal(result, empty)
|
||||
|
||||
# check series of length 1
|
||||
result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)(
|
||||
Series([1.0])
|
||||
)
|
||||
tm.assert_series_equal(result, Series([np.nan]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["cov", "corr"])
|
||||
def test_different_input_array_raise_exception(name):
|
||||
A = Series(np.random.default_rng(2).standard_normal(50), index=range(50))
|
||||
A[:10] = np.nan
|
||||
|
||||
msg = "other must be a DataFrame or Series"
|
||||
# exception raised is Exception
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(A.ewm(com=20, min_periods=5), name)(
|
||||
np.random.default_rng(2).standard_normal(50)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["var", "std", "mean"])
|
||||
def test_ewma_series(series, name):
|
||||
series_result = getattr(series.ewm(com=10), name)()
|
||||
assert isinstance(series_result, Series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["var", "std", "mean"])
|
||||
def test_ewma_frame(frame, name):
|
||||
frame_result = getattr(frame.ewm(com=10), name)()
|
||||
assert isinstance(frame_result, DataFrame)
|
||||
|
||||
|
||||
def test_ewma_span_com_args(series):
|
||||
A = series.ewm(com=9.5).mean()
|
||||
B = series.ewm(span=20).mean()
|
||||
tm.assert_almost_equal(A, B)
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, span=20)
|
||||
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm().mean()
|
||||
|
||||
|
||||
def test_ewma_halflife_arg(series):
|
||||
A = series.ewm(com=13.932726172912965).mean()
|
||||
B = series.ewm(halflife=10.0).mean()
|
||||
tm.assert_almost_equal(A, B)
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(span=20, halflife=50)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, halflife=50)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm(com=9.5, span=20, halflife=50)
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series.ewm()
|
||||
|
||||
|
||||
def test_ewm_alpha_arg(series):
|
||||
# GH 10789
|
||||
s = series
|
||||
msg = "Must pass one of comass, span, halflife, or alpha"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm()
|
||||
|
||||
msg = "comass, span, halflife, and alpha are mutually exclusive"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(com=10.0, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(span=10.0, alpha=0.5)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.ewm(halflife=10.0, alpha=0.5)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["cov", "corr"])
|
||||
def test_ewm_pairwise_cov_corr(func, frame):
|
||||
result = getattr(frame.ewm(span=10, min_periods=5), func)()
|
||||
result = result.loc[(slice(None), 1), 5]
|
||||
result.index = result.index.droplevel(1)
|
||||
expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
df = DataFrame({"a": [1], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
ewm = df.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel, None)
|
||||
if op is not None:
|
||||
result = op(numeric_only=numeric_only)
|
||||
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
|
||||
assert list(expected.columns) == columns
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
|
||||
# GH#46560
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
arg = (df,) if use_arg else ()
|
||||
ewm = df.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel)
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
# Compare result to op using float dtypes, dropping c when numeric_only is True
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
df2 = df[columns].astype(float)
|
||||
arg2 = (df2,) if use_arg else ()
|
||||
ewm2 = df2.ewm(span=2, min_periods=1)
|
||||
op2 = getattr(ewm2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
ser = Series([1], dtype=dtype)
|
||||
ewm = ser.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel, None)
|
||||
if op is None:
|
||||
# Nothing to test
|
||||
pytest.skip("No op to test")
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(numeric_only=numeric_only)
|
||||
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
|
||||
# GH#46560
|
||||
ser = Series([1, 2, 3], dtype=dtype)
|
||||
arg = (ser,) if use_arg else ()
|
||||
ewm = ser.ewm(span=2, min_periods=1)
|
||||
op = getattr(ewm, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"ExponentialMovingWindow.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(*arg, numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
ser2 = ser.astype(float)
|
||||
arg2 = (ser2,) if use_arg else ()
|
||||
ewm2 = ser2.ewm(span=2, min_periods=1)
|
||||
op2 = getattr(ewm2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
tm.assert_series_equal(result, expected)
|
@@ -0,0 +1,723 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_doc_string():
|
||||
df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
||||
df
|
||||
df.expanding(2).sum()
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
# GH 12669
|
||||
|
||||
c = frame_or_series(range(5)).expanding
|
||||
|
||||
# valid
|
||||
c(min_periods=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
|
||||
def test_constructor_invalid(frame_or_series, w):
|
||||
# not valid
|
||||
|
||||
c = frame_or_series(range(5)).expanding
|
||||
msg = "min_periods must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(min_periods=w)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expander",
|
||||
[
|
||||
1,
|
||||
pytest.param(
|
||||
"ls",
|
||||
marks=pytest.mark.xfail(
|
||||
reason="GH#16425 expanding with offset not supported"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_empty_df_expanding(expander):
|
||||
# GH 15819 Verifies that datetime and integer expanding windows can be
|
||||
# applied to empty DataFrames
|
||||
|
||||
expected = DataFrame()
|
||||
result = DataFrame().expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Verifies that datetime and integer expanding windows can be applied
|
||||
# to empty DataFrames with datetime index
|
||||
expected = DataFrame(index=DatetimeIndex([]))
|
||||
result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_missing_minp_zero():
|
||||
# https://github.com/pandas-dev/pandas/pull/18921
|
||||
# minp=0
|
||||
x = Series([np.nan])
|
||||
result = x.expanding(min_periods=0).sum()
|
||||
expected = Series([0.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# minp=1
|
||||
result = x.expanding(min_periods=1).sum()
|
||||
expected = Series([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_axis(axis_frame):
|
||||
# see gh-23372.
|
||||
df = DataFrame(np.ones((10, 20)))
|
||||
axis = df._get_axis_number(axis_frame)
|
||||
|
||||
if axis == 0:
|
||||
msg = "The 'axis' keyword in DataFrame.expanding is deprecated"
|
||||
expected = DataFrame(
|
||||
{i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
|
||||
)
|
||||
else:
|
||||
# axis == 1
|
||||
msg = "Support for axis=1 in DataFrame.expanding is deprecated"
|
||||
expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.expanding(3, axis=axis_frame).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_with_min_periods(frame_or_series):
|
||||
# GH 26996
|
||||
result = frame_or_series(range(5)).expanding(min_periods=3).count()
|
||||
expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_default_min_periods_with_null_values(frame_or_series):
|
||||
# GH 26996
|
||||
values = [1, 2, 3, np.nan, 4, 5, 6]
|
||||
expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
|
||||
|
||||
result = frame_or_series(values).expanding().count()
|
||||
expected = frame_or_series(expected_counts)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series):
|
||||
# GH 25857
|
||||
result = frame_or_series(range(5)).expanding(min_periods=6).count()
|
||||
expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df,expected,min_periods",
|
||||
[
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
3,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
2,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
|
||||
[
|
||||
({"A": [1], "B": [4]}, [0]),
|
||||
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
|
||||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
1,
|
||||
),
|
||||
(DataFrame({"A": [1], "B": [4]}), [], 2),
|
||||
(DataFrame(), [({}, [])], 1),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
3,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
2,
|
||||
),
|
||||
(
|
||||
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
|
||||
[
|
||||
({"A": [1.0], "B": [np.nan]}, [0]),
|
||||
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
|
||||
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
|
||||
],
|
||||
1,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iter_expanding_dataframe(df, expected, min_periods):
|
||||
# GH 11704
|
||||
expected = [DataFrame(values, index=index) for (values, index) in expected]
|
||||
|
||||
for expected, actual in zip(expected, df.expanding(min_periods)):
|
||||
tm.assert_frame_equal(actual, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser,expected,min_periods",
|
||||
[
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
|
||||
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
|
||||
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
|
||||
(Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
|
||||
(Series([], dtype="int64"), [], 2),
|
||||
],
|
||||
)
|
||||
def test_iter_expanding_series(ser, expected, min_periods):
|
||||
# GH 11704
|
||||
expected = [Series(values, index=index) for (values, index) in expected]
|
||||
|
||||
for expected, actual in zip(expected, ser.expanding(min_periods)):
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
|
||||
def test_center_invalid():
|
||||
# GH 20647
|
||||
df = DataFrame()
|
||||
with pytest.raises(TypeError, match=".* got an unexpected keyword"):
|
||||
df.expanding(center=True)
|
||||
|
||||
|
||||
def test_expanding_sem(frame_or_series):
|
||||
# GH: 26476
|
||||
obj = frame_or_series([0, 1, 2])
|
||||
result = obj.expanding().sem()
|
||||
if isinstance(result, DataFrame):
|
||||
result = Series(result[0].values)
|
||||
expected = Series([np.nan] + [0.707107] * 2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["skew", "kurt"])
|
||||
def test_expanding_skew_kurt_numerical_stability(method):
|
||||
# GH: 6929
|
||||
s = Series(np.random.default_rng(2).random(10))
|
||||
expected = getattr(s.expanding(3), method)()
|
||||
s = s + 5000
|
||||
result = getattr(s.expanding(3), method)()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", [1, 3, 10, 20])
|
||||
@pytest.mark.parametrize("method", ["min", "max", "average"])
|
||||
@pytest.mark.parametrize("pct", [True, False])
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
@pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
|
||||
def test_rank(window, method, pct, ascending, test_data):
|
||||
length = 20
|
||||
if test_data == "default":
|
||||
ser = Series(data=np.random.default_rng(2).random(length))
|
||||
elif test_data == "duplicates":
|
||||
ser = Series(data=np.random.default_rng(2).choice(3, length))
|
||||
elif test_data == "nans":
|
||||
ser = Series(
|
||||
data=np.random.default_rng(2).choice(
|
||||
[1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length
|
||||
)
|
||||
)
|
||||
|
||||
expected = ser.expanding(window).apply(
|
||||
lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
|
||||
)
|
||||
result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_corr(series):
|
||||
A = series.dropna()
|
||||
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
|
||||
|
||||
result = A.expanding().corr(B)
|
||||
|
||||
rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
|
||||
|
||||
tm.assert_almost_equal(rolling_result, result)
|
||||
|
||||
|
||||
def test_expanding_count(series):
|
||||
result = series.expanding(min_periods=0).count()
|
||||
tm.assert_almost_equal(
|
||||
result, series.rolling(window=len(series), min_periods=0).count()
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_quantile(series):
|
||||
result = series.expanding().quantile(0.5)
|
||||
|
||||
rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
|
||||
|
||||
tm.assert_almost_equal(result, rolling_result)
|
||||
|
||||
|
||||
def test_expanding_cov(series):
|
||||
A = series
|
||||
B = (A + np.random.default_rng(2).standard_normal(len(A)))[:-5]
|
||||
|
||||
result = A.expanding().cov(B)
|
||||
|
||||
rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
|
||||
|
||||
tm.assert_almost_equal(rolling_result, result)
|
||||
|
||||
|
||||
def test_expanding_cov_pairwise(frame):
|
||||
result = frame.expanding().cov()
|
||||
|
||||
rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
|
||||
|
||||
tm.assert_frame_equal(result, rolling_result)
|
||||
|
||||
|
||||
def test_expanding_corr_pairwise(frame):
|
||||
result = frame.expanding().corr()
|
||||
|
||||
rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
|
||||
tm.assert_frame_equal(result, rolling_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,static_comp",
|
||||
[
|
||||
("sum", np.sum),
|
||||
("mean", lambda x: np.mean(x, axis=0)),
|
||||
("max", lambda x: np.max(x, axis=0)),
|
||||
("min", lambda x: np.min(x, axis=0)),
|
||||
],
|
||||
ids=["sum", "mean", "max", "min"],
|
||||
)
|
||||
def test_expanding_func(func, static_comp, frame_or_series):
|
||||
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||||
|
||||
msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
obj = data.expanding(min_periods=1, axis=0)
|
||||
result = getattr(obj, func)()
|
||||
assert isinstance(result, frame_or_series)
|
||||
|
||||
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
|
||||
warn = None
|
||||
if frame_or_series is DataFrame and static_comp is np.sum:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
expected = static_comp(data[:11])
|
||||
if frame_or_series is Series:
|
||||
tm.assert_almost_equal(result[10], expected)
|
||||
else:
|
||||
tm.assert_series_equal(result.iloc[10], expected, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,static_comp",
|
||||
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
|
||||
ids=["sum", "mean", "max", "min"],
|
||||
)
|
||||
def test_expanding_min_periods(func, static_comp):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(50))
|
||||
|
||||
msg = "The 'axis' keyword in Series.expanding is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=30, axis=0), func)()
|
||||
assert result[:29].isna().all()
|
||||
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||||
|
||||
# min_periods is working correctly
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=15, axis=0), func)()
|
||||
assert isna(result.iloc[13])
|
||||
assert notna(result.iloc[14])
|
||||
|
||||
ser2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
|
||||
assert isna(result[3])
|
||||
assert notna(result[4])
|
||||
|
||||
# min_periods=0
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||||
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||||
|
||||
|
||||
def test_expanding_apply(engine_and_raw, frame_or_series):
|
||||
engine, raw = engine_and_raw
|
||||
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||||
result = data.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isinstance(result, frame_or_series)
|
||||
|
||||
if frame_or_series is Series:
|
||||
tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0))
|
||||
else:
|
||||
tm.assert_series_equal(
|
||||
result.iloc[9], np.mean(data[:11], axis=0), check_names=False
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_min_periods_apply(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
ser = Series(np.random.default_rng(2).standard_normal(50))
|
||||
|
||||
result = ser.expanding(min_periods=30).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert result[:29].isna().all()
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = ser.expanding(min_periods=15).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isna(result.iloc[13])
|
||||
assert notna(result.iloc[14])
|
||||
|
||||
ser2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = ser2.expanding(min_periods=5).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
assert isna(result[3])
|
||||
assert notna(result[4])
|
||||
|
||||
# min_periods=0
|
||||
result0 = ser.expanding(min_periods=0).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
result1 = ser.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
result = ser.expanding(min_periods=1).apply(
|
||||
lambda x: x.mean(), raw=raw, engine=engine
|
||||
)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length_pairwise(f):
|
||||
df1 = DataFrame()
|
||||
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
|
||||
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
|
||||
df2_expected = DataFrame(
|
||||
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
|
||||
columns=Index(["a"], name="foo"),
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().count(),
|
||||
lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.expanding(min_periods=5).max(),
|
||||
lambda x: x.expanding(min_periods=5).min(),
|
||||
lambda x: x.expanding(min_periods=5).sum(),
|
||||
lambda x: x.expanding(min_periods=5).mean(),
|
||||
lambda x: x.expanding(min_periods=5).std(),
|
||||
lambda x: x.expanding(min_periods=5).var(),
|
||||
lambda x: x.expanding(min_periods=5).skew(),
|
||||
lambda x: x.expanding(min_periods=5).kurt(),
|
||||
lambda x: x.expanding(min_periods=5).quantile(0.5),
|
||||
lambda x: x.expanding(min_periods=5).median(),
|
||||
lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length(f):
|
||||
# GH 8056
|
||||
s = Series(dtype=np.float64)
|
||||
s_expected = s
|
||||
df1 = DataFrame()
|
||||
df1_expected = df1
|
||||
df2 = DataFrame(columns=["a"])
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
df2_expected = df2
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
def test_expanding_apply_empty_series(engine_and_raw):
|
||||
engine, raw = engine_and_raw
|
||||
ser = Series([], dtype=np.float64)
|
||||
tm.assert_series_equal(
|
||||
ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
|
||||
)
|
||||
|
||||
|
||||
def test_expanding_apply_min_periods_0(engine_and_raw):
|
||||
# GH 8080
|
||||
engine, raw = engine_and_raw
|
||||
s = Series([None, None, None])
|
||||
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
|
||||
expected = Series([1.0, 2.0, 3.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_cov_diff_index():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.expanding().cov(s2)
|
||||
expected = Series([None, None, 2.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.expanding().cov(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||||
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||||
result = s1.expanding().cov(s2)
|
||||
expected = Series([None, None, None, 4.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_corr_diff_index():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.expanding().corr(s2)
|
||||
expected = Series([None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.expanding().corr(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||||
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||||
result = s1.expanding().corr(s2)
|
||||
expected = Series([None, None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_expanding_cov_pairwise_diff_length():
|
||||
# GH 7512
|
||||
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
|
||||
df1a = DataFrame(
|
||||
[[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
|
||||
)
|
||||
df2 = DataFrame(
|
||||
[[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
|
||||
)
|
||||
df2a = DataFrame(
|
||||
[[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
|
||||
)
|
||||
# TODO: xref gh-15826
|
||||
# .loc is not preserving the names
|
||||
result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
|
||||
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
|
||||
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
|
||||
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
|
||||
expected = DataFrame(
|
||||
[[-3.0, -6.0], [-5.0, -10.0]],
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=Index(["X", "Y"], name="foo"),
|
||||
)
|
||||
tm.assert_frame_equal(result1, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
tm.assert_frame_equal(result3, expected)
|
||||
tm.assert_frame_equal(result4, expected)
|
||||
|
||||
|
||||
def test_expanding_corr_pairwise_diff_length():
|
||||
# GH 7512
|
||||
df1 = DataFrame(
|
||||
[[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
|
||||
)
|
||||
df1a = DataFrame(
|
||||
[[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
|
||||
)
|
||||
df2 = DataFrame(
|
||||
[[5, 6], [None, None], [2, 1]],
|
||||
columns=["X", "Y"],
|
||||
index=Index(range(3), name="bar"),
|
||||
)
|
||||
df2a = DataFrame(
|
||||
[[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
|
||||
)
|
||||
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
|
||||
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
|
||||
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
|
||||
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
|
||||
expected = DataFrame(
|
||||
[[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
|
||||
)
|
||||
tm.assert_frame_equal(result1, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
tm.assert_frame_equal(result3, expected)
|
||||
tm.assert_frame_equal(result4, expected)
|
||||
|
||||
|
||||
def test_expanding_apply_args_kwargs(engine_and_raw):
|
||||
def mean_w_arg(x, const):
|
||||
return np.mean(x) + const
|
||||
|
||||
engine, raw = engine_and_raw
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((20, 3)))
|
||||
|
||||
expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
|
||||
|
||||
result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
df = DataFrame({"a": [1], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
expanding = df.expanding()
|
||||
op = getattr(expanding, kernel, None)
|
||||
if op is not None:
|
||||
result = op(numeric_only=numeric_only)
|
||||
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
|
||||
assert list(expected.columns) == columns
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
|
||||
# GH#46560
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
|
||||
df["c"] = df["c"].astype(object)
|
||||
arg = (df,) if use_arg else ()
|
||||
expanding = df.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
# Compare result to op using float dtypes, dropping c when numeric_only is True
|
||||
columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
|
||||
df2 = df[columns].astype(float)
|
||||
arg2 = (df2,) if use_arg else ()
|
||||
expanding2 = df2.expanding()
|
||||
op2 = getattr(expanding2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
|
||||
# GH#46560
|
||||
kernel = arithmetic_win_operators
|
||||
ser = Series([1], dtype=dtype)
|
||||
expanding = ser.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"Expanding.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(numeric_only=numeric_only)
|
||||
expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kernel", ["corr", "cov"])
|
||||
@pytest.mark.parametrize("use_arg", [True, False])
|
||||
@pytest.mark.parametrize("dtype", [int, object])
|
||||
def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
|
||||
# GH#46560
|
||||
ser = Series([1, 2, 3], dtype=dtype)
|
||||
arg = (ser,) if use_arg else ()
|
||||
expanding = ser.expanding()
|
||||
op = getattr(expanding, kernel)
|
||||
if numeric_only and dtype is object:
|
||||
msg = f"Expanding.{kernel} does not implement numeric_only"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(*arg, numeric_only=numeric_only)
|
||||
else:
|
||||
result = op(*arg, numeric_only=numeric_only)
|
||||
|
||||
ser2 = ser.astype(float)
|
||||
arg2 = (ser2,) if use_arg else ()
|
||||
expanding2 = ser2.expanding()
|
||||
op2 = getattr(expanding2, kernel)
|
||||
expected = op2(*arg2, numeric_only=numeric_only)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_keyword_quantile_deprecated():
|
||||
# GH #52550
|
||||
ser = Series([1, 2, 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.expanding().quantile(quantile=0.5)
|
1318
lib/python3.13/site-packages/pandas/tests/window/test_groupby.py
Normal file
1318
lib/python3.13/site-packages/pandas/tests/window/test_groupby.py
Normal file
File diff suppressed because it is too large
Load Diff
455
lib/python3.13/site-packages/pandas/tests/window/test_numba.py
Normal file
455
lib/python3.13/site-packages/pandas/tests/window/test_numba.py
Normal file
@@ -0,0 +1,455 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import NumbaUtilError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
option_context,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.fixture(params=["single", "table"])
|
||||
def method(request):
|
||||
"""method keyword in rolling/expanding/ewm constructor"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
["sum", {}],
|
||||
["mean", {}],
|
||||
["median", {}],
|
||||
["max", {}],
|
||||
["min", {}],
|
||||
["var", {}],
|
||||
["var", {"ddof": 0}],
|
||||
["std", {}],
|
||||
["std", {"ddof": 0}],
|
||||
]
|
||||
)
|
||||
def arithmetic_numba_supported_operators(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestEngine:
|
||||
@pytest.mark.parametrize("jit", [True, False])
|
||||
def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step):
|
||||
def f(x, *args):
|
||||
arg_sum = 0
|
||||
for arg in args:
|
||||
arg_sum += arg
|
||||
return np.mean(x) + arg_sum
|
||||
|
||||
if jit:
|
||||
import numba
|
||||
|
||||
f = numba.jit(f)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
args = (2,)
|
||||
|
||||
s = Series(range(10))
|
||||
result = s.rolling(2, center=center, step=step).apply(
|
||||
f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = s.rolling(2, center=center, step=step).apply(
|
||||
f, engine="cython", args=args, raw=True
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
DataFrame(np.eye(5)),
|
||||
DataFrame(
|
||||
[
|
||||
[5, 7, 7, 7, np.nan, np.inf, 4, 3, 3, 3],
|
||||
[5, 7, 7, 7, np.nan, np.inf, 7, 3, 3, 3],
|
||||
[np.nan, np.nan, 5, 6, 7, 5, 5, 5, 5, 5],
|
||||
]
|
||||
).T,
|
||||
Series(range(5), name="foo"),
|
||||
Series([20, 10, 10, np.inf, 1, 1, 2, 3]),
|
||||
Series([20, 10, 10, np.nan, 10, 1, 2, 3]),
|
||||
],
|
||||
)
|
||||
def test_numba_vs_cython_rolling_methods(
|
||||
self,
|
||||
data,
|
||||
nogil,
|
||||
parallel,
|
||||
nopython,
|
||||
arithmetic_numba_supported_operators,
|
||||
step,
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
roll = data.rolling(3, step=step)
|
||||
result = getattr(roll, method)(
|
||||
engine="numba", engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
expected = getattr(roll, method)(engine="cython", **kwargs)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data", [DataFrame(np.eye(5)), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_numba_vs_cython_expanding_methods(
|
||||
self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
data = DataFrame(np.eye(5))
|
||||
expand = data.expanding()
|
||||
result = getattr(expand, method)(
|
||||
engine="numba", engine_kwargs=engine_kwargs, **kwargs
|
||||
)
|
||||
expected = getattr(expand, method)(engine="cython", **kwargs)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("jit", [True, False])
|
||||
def test_cache_apply(self, jit, nogil, parallel, nopython, step):
|
||||
# Test that the functions are cached correctly if we switch functions
|
||||
def func_1(x):
|
||||
return np.mean(x) + 4
|
||||
|
||||
def func_2(x):
|
||||
return np.std(x) * 5
|
||||
|
||||
if jit:
|
||||
import numba
|
||||
|
||||
func_1 = numba.jit(func_1)
|
||||
func_2 = numba.jit(func_2)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
roll = Series(range(10)).rolling(2, step=step)
|
||||
result = roll.apply(
|
||||
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_1, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = roll.apply(
|
||||
func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_2, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# This run should use the cached func_1
|
||||
result = roll.apply(
|
||||
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
|
||||
)
|
||||
expected = roll.apply(func_1, engine="cython", raw=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"window,window_kwargs",
|
||||
[
|
||||
["rolling", {"window": 3, "min_periods": 0}],
|
||||
["expanding", {}],
|
||||
],
|
||||
)
|
||||
def test_dont_cache_args(
|
||||
self, window, window_kwargs, nogil, parallel, nopython, method
|
||||
):
|
||||
# GH 42287
|
||||
|
||||
def add(values, x):
|
||||
return np.sum(values) + x
|
||||
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
df = DataFrame({"value": [0, 0, 0]})
|
||||
result = getattr(df, window)(method=method, **window_kwargs).apply(
|
||||
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(1,)
|
||||
)
|
||||
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = getattr(df, window)(method=method, **window_kwargs).apply(
|
||||
add, raw=True, engine="numba", engine_kwargs=engine_kwargs, args=(2,)
|
||||
)
|
||||
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_dont_cache_engine_kwargs(self):
|
||||
# If the user passes a different set of engine_kwargs don't return the same
|
||||
# jitted function
|
||||
nogil = False
|
||||
parallel = True
|
||||
nopython = True
|
||||
|
||||
def func(x):
|
||||
return nogil + parallel + nopython
|
||||
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
df = DataFrame({"value": [0, 0, 0]})
|
||||
result = df.rolling(1).apply(
|
||||
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
|
||||
)
|
||||
expected = DataFrame({"value": [2.0, 2.0, 2.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
parallel = False
|
||||
engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
|
||||
result = df.rolling(1).apply(
|
||||
func, raw=True, engine="numba", engine_kwargs=engine_kwargs
|
||||
)
|
||||
expected = DataFrame({"value": [1.0, 1.0, 1.0]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
class TestEWM:
|
||||
@pytest.mark.parametrize(
|
||||
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_invalid_engine(self, grouper, method):
|
||||
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
|
||||
with pytest.raises(ValueError, match="engine must be either"):
|
||||
getattr(grouper(df).ewm(com=1.0), method)(engine="foo")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_invalid_engine_kwargs(self, grouper, method):
|
||||
df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
|
||||
with pytest.raises(ValueError, match="cython engine does not"):
|
||||
getattr(grouper(df).ewm(com=1.0), method)(
|
||||
engine="cython", engine_kwargs={"nopython": True}
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("grouper", ["None", "groupby"])
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_cython_vs_numba(
|
||||
self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
|
||||
):
|
||||
df = DataFrame({"B": range(4)})
|
||||
if grouper == "None":
|
||||
grouper = lambda x: x
|
||||
else:
|
||||
df["A"] = ["a", "b", "a", "b"]
|
||||
grouper = lambda x: x.groupby("A")
|
||||
if method == "sum":
|
||||
adjust = True
|
||||
ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
|
||||
expected = getattr(ewm, method)(engine="cython")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("grouper", ["None", "groupby"])
|
||||
def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
|
||||
# GH 40951
|
||||
|
||||
df = DataFrame({"B": [0, 0, 1, 1, 2, 2]})
|
||||
if grouper == "None":
|
||||
grouper = lambda x: x
|
||||
else:
|
||||
grouper = lambda x: x.groupby("A")
|
||||
df["A"] = ["a", "b", "a", "b", "b", "a"]
|
||||
|
||||
halflife = "23 days"
|
||||
times = to_datetime(
|
||||
[
|
||||
"2020-01-01",
|
||||
"2020-01-01",
|
||||
"2020-01-02",
|
||||
"2020-01-10",
|
||||
"2020-02-23",
|
||||
"2020-01-03",
|
||||
]
|
||||
)
|
||||
ewm = grouper(df).ewm(
|
||||
halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
|
||||
)
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
|
||||
expected = ewm.mean(engine="cython")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_use_global_config():
|
||||
def f(x):
|
||||
return np.mean(x) + 2
|
||||
|
||||
s = Series(range(10))
|
||||
with option_context("compute.use_numba", True):
|
||||
result = s.rolling(2).apply(f, engine=None, raw=True)
|
||||
expected = s.rolling(2).apply(f, engine="numba", raw=True)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_invalid_kwargs_nopython():
|
||||
with pytest.raises(NumbaUtilError, match="numba does not support kwargs with"):
|
||||
Series(range(1)).rolling(1).apply(
|
||||
lambda x: x, kwargs={"a": 1}, engine="numba", raw=True
|
||||
)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestTableMethod:
|
||||
def test_table_series_valueerror(self):
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="method='table' not applicable for Series objects."
|
||||
):
|
||||
Series(range(1)).rolling(1, method="table").apply(
|
||||
f, engine="numba", raw=True
|
||||
)
|
||||
|
||||
def test_table_method_rolling_methods(
|
||||
self,
|
||||
axis,
|
||||
nogil,
|
||||
parallel,
|
||||
nopython,
|
||||
arithmetic_numba_supported_operators,
|
||||
step,
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step)
|
||||
if method in ("var", "std"):
|
||||
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
|
||||
getattr(roll_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
else:
|
||||
roll_single = df.rolling(
|
||||
2, method="single", axis=axis, min_periods=0, step=step
|
||||
)
|
||||
result = getattr(roll_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
expected = getattr(roll_single, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
result = df.rolling(
|
||||
2, method="table", axis=axis, min_periods=0, step=step
|
||||
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
|
||||
expected = df.rolling(
|
||||
2, method="single", axis=axis, min_periods=0, step=step
|
||||
).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_rolling_weighted_mean(self, step):
|
||||
def weighted_mean(x):
|
||||
arr = np.ones((1, x.shape[1]))
|
||||
arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
|
||||
return arr
|
||||
|
||||
df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
|
||||
result = df.rolling(2, method="table", min_periods=0, step=step).apply(
|
||||
weighted_mean, raw=True, engine="numba"
|
||||
)
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 1.0],
|
||||
[1.8, 2.0, 1.0],
|
||||
[3.333333, 2.333333, 1.0],
|
||||
[1.555556, 7, 1.0],
|
||||
]
|
||||
)[::step]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
def f(x):
|
||||
return np.sum(x, axis=0) + 1
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
result = df.expanding(method="table", axis=axis).apply(
|
||||
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
expected = df.expanding(method="single", axis=axis).apply(
|
||||
f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_table_method_expanding_methods(
|
||||
self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators
|
||||
):
|
||||
method, kwargs = arithmetic_numba_supported_operators
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(np.eye(3))
|
||||
expand_table = df.expanding(method="table", axis=axis)
|
||||
if method in ("var", "std"):
|
||||
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
|
||||
getattr(expand_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
else:
|
||||
expand_single = df.expanding(method="single", axis=axis)
|
||||
result = getattr(expand_table, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
expected = getattr(expand_single, method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba", **kwargs
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))])
|
||||
@pytest.mark.parametrize("method", ["mean", "sum"])
|
||||
def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython):
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
df = DataFrame(data)
|
||||
|
||||
result = getattr(df.ewm(com=1, method="table", axis=axis), method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
expected = getattr(df.ewm(com=1, method="single", axis=axis), method)(
|
||||
engine_kwargs=engine_kwargs, engine="numba"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("numba")
|
||||
def test_npfunc_no_warnings():
|
||||
df = DataFrame({"col1": [1, 2, 3, 4, 5]})
|
||||
with tm.assert_produces_warning(False):
|
||||
df.col1.rolling(2).apply(np.prod, raw=True, engine="numba")
|
103
lib/python3.13/site-packages/pandas/tests/window/test_online.py
Normal file
103
lib/python3.13/site-packages/pandas/tests/window/test_online.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
pytest.importorskip("numba")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore")
|
||||
# Filter warnings when parallel=True and the function can't be parallelized by Numba
|
||||
class TestEWM:
|
||||
def test_invalid_update(self):
|
||||
df = DataFrame({"a": range(5), "b": range(5)})
|
||||
online_ewm = df.head(2).ewm(0.5).online()
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Must call mean with update=None first before passing update",
|
||||
):
|
||||
online_ewm.mean(update=df.head(1))
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_online_vs_non_online_mean(
|
||||
self, obj, nogil, parallel, nopython, adjust, ignore_na
|
||||
):
|
||||
expected = obj.ewm(0.5, adjust=adjust, ignore_na=ignore_na).mean()
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
|
||||
online_ewm = (
|
||||
obj.head(2)
|
||||
.ewm(0.5, adjust=adjust, ignore_na=ignore_na)
|
||||
.online(engine_kwargs=engine_kwargs)
|
||||
)
|
||||
# Test resetting once
|
||||
for _ in range(2):
|
||||
result = online_ewm.mean()
|
||||
tm.assert_equal(result, expected.head(2))
|
||||
|
||||
result = online_ewm.mean(update=obj.tail(3))
|
||||
tm.assert_equal(result, expected.tail(3))
|
||||
|
||||
online_ewm.reset()
|
||||
|
||||
@pytest.mark.xfail(raises=NotImplementedError)
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [DataFrame({"a": range(5), "b": range(5)}), Series(range(5), name="foo")]
|
||||
)
|
||||
def test_update_times_mean(
|
||||
self, obj, nogil, parallel, nopython, adjust, ignore_na, halflife_with_times
|
||||
):
|
||||
times = Series(
|
||||
np.array(
|
||||
["2020-01-01", "2020-01-05", "2020-01-07", "2020-01-17", "2020-01-21"],
|
||||
dtype="datetime64[ns]",
|
||||
)
|
||||
)
|
||||
expected = obj.ewm(
|
||||
0.5,
|
||||
adjust=adjust,
|
||||
ignore_na=ignore_na,
|
||||
times=times,
|
||||
halflife=halflife_with_times,
|
||||
).mean()
|
||||
|
||||
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
|
||||
online_ewm = (
|
||||
obj.head(2)
|
||||
.ewm(
|
||||
0.5,
|
||||
adjust=adjust,
|
||||
ignore_na=ignore_na,
|
||||
times=times.head(2),
|
||||
halflife=halflife_with_times,
|
||||
)
|
||||
.online(engine_kwargs=engine_kwargs)
|
||||
)
|
||||
# Test resetting once
|
||||
for _ in range(2):
|
||||
result = online_ewm.mean()
|
||||
tm.assert_equal(result, expected.head(2))
|
||||
|
||||
result = online_ewm.mean(update=obj.tail(3), update_times=times.tail(3))
|
||||
tm.assert_equal(result, expected.tail(3))
|
||||
|
||||
online_ewm.reset()
|
||||
|
||||
@pytest.mark.parametrize("method", ["aggregate", "std", "corr", "cov", "var"])
|
||||
def test_ewm_notimplementederror_raises(self, method):
|
||||
ser = Series(range(10))
|
||||
kwargs = {}
|
||||
if method == "aggregate":
|
||||
kwargs["func"] = lambda x: x
|
||||
|
||||
with pytest.raises(NotImplementedError, match=".* is not implemented."):
|
||||
getattr(ser.ewm(1).online(), method)(**kwargs)
|
@@ -0,0 +1,445 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.algorithms import safe_sort
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]),
|
||||
DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]),
|
||||
DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]),
|
||||
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]),
|
||||
]
|
||||
)
|
||||
def pairwise_frames(request):
|
||||
"""Pairwise frames test_pairwise"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pairwise_target_frame():
|
||||
"""Pairwise target frame for test_pairwise"""
|
||||
return DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pairwise_other_frame():
|
||||
"""Pairwise other frame for test_pairwise"""
|
||||
return DataFrame(
|
||||
[[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]],
|
||||
columns=["Y", "Z", "X"],
|
||||
)
|
||||
|
||||
|
||||
def test_rolling_cov(series):
|
||||
A = series
|
||||
B = A + np.random.default_rng(2).standard_normal(len(A))
|
||||
|
||||
result = A.rolling(window=50, min_periods=25).cov(B)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.cov(A[-50:], B[-50:])[0, 1])
|
||||
|
||||
|
||||
def test_rolling_corr(series):
|
||||
A = series
|
||||
B = A + np.random.default_rng(2).standard_normal(len(A))
|
||||
|
||||
result = A.rolling(window=50, min_periods=25).corr(B)
|
||||
tm.assert_almost_equal(result.iloc[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
|
||||
|
||||
|
||||
def test_rolling_corr_bias_correction():
|
||||
# test for correct bias correction
|
||||
a = Series(
|
||||
np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
|
||||
)
|
||||
b = a.copy()
|
||||
a[:5] = np.nan
|
||||
b[:10] = np.nan
|
||||
|
||||
result = a.rolling(window=len(a), min_periods=1).corr(b)
|
||||
tm.assert_almost_equal(result.iloc[-1], a.corr(b))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["cov", "corr"])
|
||||
def test_rolling_pairwise_cov_corr(func, frame):
|
||||
result = getattr(frame.rolling(window=10, min_periods=5), func)()
|
||||
result = result.loc[(slice(None), 1), 5]
|
||||
result.index = result.index.droplevel(1)
|
||||
expected = getattr(frame[1].rolling(window=10, min_periods=5), func)(frame[5])
|
||||
tm.assert_series_equal(result, expected, check_names=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["corr", "cov"])
|
||||
def test_flex_binary_frame(method, frame):
|
||||
series = frame[1]
|
||||
|
||||
res = getattr(series.rolling(window=10), method)(frame)
|
||||
res2 = getattr(frame.rolling(window=10), method)(series)
|
||||
exp = frame.apply(lambda x: getattr(series.rolling(window=10), method)(x))
|
||||
|
||||
tm.assert_frame_equal(res, exp)
|
||||
tm.assert_frame_equal(res2, exp)
|
||||
|
||||
frame2 = frame.copy()
|
||||
frame2 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal(frame2.shape),
|
||||
index=frame2.index,
|
||||
columns=frame2.columns,
|
||||
)
|
||||
|
||||
res3 = getattr(frame.rolling(window=10), method)(frame2)
|
||||
exp = DataFrame(
|
||||
{k: getattr(frame[k].rolling(window=10), method)(frame2[k]) for k in frame}
|
||||
)
|
||||
tm.assert_frame_equal(res3, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("window", range(7))
|
||||
def test_rolling_corr_with_zero_variance(window):
|
||||
# GH 18430
|
||||
s = Series(np.zeros(20))
|
||||
other = Series(np.arange(20))
|
||||
|
||||
assert s.rolling(window=window).corr(other=other).isna().all()
|
||||
|
||||
|
||||
def test_corr_sanity():
|
||||
# GH 3155
|
||||
df = DataFrame(
|
||||
np.array(
|
||||
[
|
||||
[0.87024726, 0.18505595],
|
||||
[0.64355431, 0.3091617],
|
||||
[0.92372966, 0.50552513],
|
||||
[0.00203756, 0.04520709],
|
||||
[0.84780328, 0.33394331],
|
||||
[0.78369152, 0.63919667],
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
res = df[0].rolling(5, center=True).corr(df[1])
|
||||
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((30, 2)))
|
||||
res = df[0].rolling(5, center=True).corr(df[1])
|
||||
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
||||
|
||||
|
||||
def test_rolling_cov_diff_length():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).cov(s2)
|
||||
expected = Series([None, None, 2.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).cov(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_rolling_corr_diff_length():
|
||||
# GH 7512
|
||||
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||||
s2 = Series([1, 3], index=[0, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).corr(s2)
|
||||
expected = Series([None, None, 1.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||||
result = s1.rolling(window=3, min_periods=2).corr(s2a)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_rolling_functions_window_non_shrinkage_binary(f):
|
||||
# corr/cov return a MI DataFrame
|
||||
df = DataFrame(
|
||||
[[1, 5], [3, 2], [3, 9], [-1, 0]],
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=Index(range(4), name="bar"),
|
||||
)
|
||||
df_expected = DataFrame(
|
||||
columns=Index(["A", "B"], name="foo"),
|
||||
index=MultiIndex.from_product([df.index, df.columns], names=["bar", "foo"]),
|
||||
dtype="float64",
|
||||
)
|
||||
df_result = f(df)
|
||||
tm.assert_frame_equal(df_result, df_expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)),
|
||||
lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length_pairwise(f):
|
||||
df1 = DataFrame()
|
||||
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
|
||||
df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
|
||||
df2_expected = DataFrame(
|
||||
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
|
||||
columns=Index(["a"], name="foo"),
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
||||
|
||||
|
||||
class TestPairwise:
|
||||
# GH 7738
|
||||
@pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()])
|
||||
def test_no_flex(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame methods (which do not call flex_binary_moment())
|
||||
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.columns)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=True),
|
||||
lambda x: x.expanding().corr(pairwise=True),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=True),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=True),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=True),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with itself, pairwise=True
|
||||
# note that we may construct the 1st level of the MI
|
||||
# in a non-monotonic way, so compare accordingly
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(
|
||||
result.index.levels[0], pairwise_frames.index, check_names=False
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
safe_sort(result.index.levels[1]),
|
||||
safe_sort(pairwise_frames.columns.unique()),
|
||||
)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.expanding().cov(pairwise=False),
|
||||
lambda x: x.expanding().corr(pairwise=False),
|
||||
lambda x: x.rolling(window=3).cov(pairwise=False),
|
||||
lambda x: x.rolling(window=3).corr(pairwise=False),
|
||||
lambda x: x.ewm(com=3).cov(pairwise=False),
|
||||
lambda x: x.ewm(com=3).corr(pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_self(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with itself, pairwise=False
|
||||
result = f(pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=True),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=True),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
|
||||
# TODO: We're missing a flag somewhere in meson
|
||||
pytest.param(
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
|
||||
marks=pytest.mark.xfail(
|
||||
not IS64, reason="Precision issues on 32 bit", strict=False
|
||||
),
|
||||
),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=True),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_other(
|
||||
self, pairwise_frames, pairwise_target_frame, pairwise_other_frame, f
|
||||
):
|
||||
# DataFrame with another DataFrame, pairwise=True
|
||||
result = f(pairwise_frames, pairwise_other_frame)
|
||||
tm.assert_index_equal(
|
||||
result.index.levels[0], pairwise_frames.index, check_names=False
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
safe_sort(result.index.levels[1]),
|
||||
safe_sort(pairwise_other_frame.columns.unique()),
|
||||
)
|
||||
expected = f(pairwise_target_frame, pairwise_other_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:RuntimeWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y, pairwise=False),
|
||||
lambda x, y: x.expanding().corr(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
|
||||
lambda x, y: x.ewm(com=3).corr(y, pairwise=False),
|
||||
],
|
||||
)
|
||||
def test_no_pairwise_with_other(self, pairwise_frames, pairwise_other_frame, f):
|
||||
# DataFrame with another DataFrame, pairwise=False
|
||||
result = (
|
||||
f(pairwise_frames, pairwise_other_frame)
|
||||
if pairwise_frames.columns.is_unique
|
||||
else None
|
||||
)
|
||||
if result is not None:
|
||||
# we can have int and str columns
|
||||
expected_index = pairwise_frames.index.union(pairwise_other_frame.index)
|
||||
expected_columns = pairwise_frames.columns.union(
|
||||
pairwise_other_frame.columns
|
||||
)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
tm.assert_index_equal(result.columns, expected_columns)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="'arg1' columns are not unique"):
|
||||
f(pairwise_frames, pairwise_other_frame)
|
||||
with pytest.raises(ValueError, match="'arg2' columns are not unique"):
|
||||
f(pairwise_other_frame, pairwise_frames)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x, y: x.expanding().cov(y),
|
||||
lambda x, y: x.expanding().corr(y),
|
||||
lambda x, y: x.rolling(window=3).cov(y),
|
||||
lambda x, y: x.rolling(window=3).corr(y),
|
||||
lambda x, y: x.ewm(com=3).cov(y),
|
||||
lambda x, y: x.ewm(com=3).corr(y),
|
||||
],
|
||||
)
|
||||
def test_pairwise_with_series(self, pairwise_frames, pairwise_target_frame, f):
|
||||
# DataFrame with a Series
|
||||
result = f(pairwise_frames, Series([1, 1, 3, 8]))
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(pairwise_target_frame, Series([1, 1, 3, 8]))
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
result = f(Series([1, 1, 3, 8]), pairwise_frames)
|
||||
tm.assert_index_equal(result.index, pairwise_frames.index)
|
||||
tm.assert_index_equal(result.columns, pairwise_frames.columns)
|
||||
expected = f(Series([1, 1, 3, 8]), pairwise_target_frame)
|
||||
# since we have sorted the results
|
||||
# we can only compare non-nans
|
||||
result = result.dropna().values
|
||||
expected = expected.dropna().values
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
def test_corr_freq_memory_error(self):
|
||||
# GH 31789
|
||||
s = Series(range(5), index=date_range("2020", periods=5))
|
||||
result = s.rolling("12h").corr(s)
|
||||
expected = Series([np.nan] * 5, index=date_range("2020", periods=5))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_cov_mulittindex(self):
|
||||
# GH 34440
|
||||
|
||||
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
|
||||
index = range(3)
|
||||
df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns)
|
||||
|
||||
result = df.ewm(alpha=0.1).cov()
|
||||
|
||||
index = MultiIndex.from_product([range(3), list("ab"), list("xy"), list("AB")])
|
||||
columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")])
|
||||
expected = DataFrame(
|
||||
np.vstack(
|
||||
(
|
||||
np.full((8, 8), np.nan),
|
||||
np.full((8, 8), 32.000000),
|
||||
np.full((8, 8), 63.881919),
|
||||
)
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multindex_columns_pairwise_func(self):
|
||||
# GH 21157
|
||||
columns = MultiIndex.from_arrays([["M", "N"], ["P", "Q"]], names=["a", "b"])
|
||||
df = DataFrame(np.ones((5, 2)), columns=columns)
|
||||
result = df.rolling(3).corr()
|
||||
expected = DataFrame(
|
||||
np.nan,
|
||||
index=MultiIndex.from_arrays(
|
||||
[
|
||||
np.repeat(np.arange(5, dtype=np.int64), 2),
|
||||
["M", "N"] * 5,
|
||||
["P", "Q"] * 5,
|
||||
],
|
||||
names=[None, "a", "b"],
|
||||
),
|
||||
columns=columns,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
1979
lib/python3.13/site-packages/pandas/tests/window/test_rolling.py
Normal file
1979
lib/python3.13/site-packages/pandas/tests/window/test_rolling.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,532 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[
|
||||
lambda x: np.isfinite(x).astype(float).sum(),
|
||||
"count",
|
||||
{},
|
||||
],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_series(series, compare_func, roll_func, kwargs, step):
|
||||
result = getattr(series.rolling(50, step=step), roll_func)(**kwargs)
|
||||
assert isinstance(result, Series)
|
||||
end = range(0, len(series), step or 1)[-1] + 1
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[
|
||||
lambda x: np.isfinite(x).astype(float).sum(),
|
||||
"count",
|
||||
{},
|
||||
],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_frame(raw, frame, compare_func, roll_func, kwargs, step):
|
||||
result = getattr(frame.rolling(50, step=step), roll_func)(**kwargs)
|
||||
assert isinstance(result, DataFrame)
|
||||
end = range(0, len(frame), step or 1)[-1] + 1
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs, minp",
|
||||
[
|
||||
[np.mean, "mean", {}, 10],
|
||||
[np.nansum, "sum", {}, 10],
|
||||
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
|
||||
[np.median, "median", {}, 10],
|
||||
[np.min, "min", {}, 10],
|
||||
[np.max, "max", {}, 10],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}, 10],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}, 10],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
|
||||
],
|
||||
)
|
||||
def test_time_rule_series(series, compare_func, roll_func, kwargs, minp):
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = getattr(ser.rolling(window=win, min_periods=minp), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs, minp",
|
||||
[
|
||||
[np.mean, "mean", {}, 10],
|
||||
[np.nansum, "sum", {}, 10],
|
||||
[lambda x: np.isfinite(x).astype(float).sum(), "count", {}, 0],
|
||||
[np.median, "median", {}, 10],
|
||||
[np.min, "min", {}, 10],
|
||||
[np.max, "max", {}, 10],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}, 10],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}, 10],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}, 10],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}, 10],
|
||||
],
|
||||
)
|
||||
def test_time_rule_frame(raw, frame, compare_func, roll_func, kwargs, minp):
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = getattr(frm.rolling(window=win, min_periods=minp), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compare_func, roll_func, kwargs",
|
||||
[
|
||||
[np.mean, "mean", {}],
|
||||
[np.nansum, "sum", {}],
|
||||
[np.median, "median", {}],
|
||||
[np.min, "min", {}],
|
||||
[np.max, "max", {}],
|
||||
[lambda x: np.std(x, ddof=1), "std", {}],
|
||||
[lambda x: np.std(x, ddof=0), "std", {"ddof": 0}],
|
||||
[lambda x: np.var(x, ddof=1), "var", {}],
|
||||
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
def test_nans(compare_func, roll_func, kwargs):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(50, min_periods=30), roll_func)(**kwargs)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = getattr(obj.rolling(20, min_periods=15), roll_func)(**kwargs)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = getattr(obj2.rolling(10, min_periods=5), roll_func)(**kwargs)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
if roll_func != "sum":
|
||||
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)(**kwargs)
|
||||
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)(**kwargs)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
def test_nans_count():
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
result = obj.rolling(50, min_periods=30).count()
|
||||
tm.assert_almost_equal(
|
||||
result.iloc[-1], np.isfinite(obj[10:-10]).astype(float).sum()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs",
|
||||
[
|
||||
["mean", {}],
|
||||
["sum", {}],
|
||||
["median", {}],
|
||||
["min", {}],
|
||||
["max", {}],
|
||||
["std", {}],
|
||||
["std", {"ddof": 0}],
|
||||
["var", {}],
|
||||
["var", {"ddof": 0}],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
def test_min_periods(series, minp, roll_func, kwargs, step):
|
||||
result = getattr(
|
||||
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
|
||||
)(**kwargs)
|
||||
expected = getattr(
|
||||
series.rolling(len(series), min_periods=minp, step=step), roll_func
|
||||
)(**kwargs)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
def test_min_periods_count(series, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=0, step=step).count()
|
||||
expected = series.rolling(len(series), min_periods=0, step=step).count()
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp",
|
||||
[
|
||||
["mean", {}, 15],
|
||||
["sum", {}, 15],
|
||||
["count", {}, 0],
|
||||
["median", {}, 15],
|
||||
["min", {}, 15],
|
||||
["max", {}, 15],
|
||||
["std", {}, 15],
|
||||
["std", {"ddof": 0}, 15],
|
||||
["var", {}, 15],
|
||||
["var", {"ddof": 0}, 15],
|
||||
],
|
||||
)
|
||||
def test_center(roll_func, kwargs, minp):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
|
||||
**kwargs
|
||||
)
|
||||
expected = (
|
||||
getattr(
|
||||
concat([obj, Series([np.nan] * 9)]).rolling(20, min_periods=minp), roll_func
|
||||
)(**kwargs)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp, fill_value",
|
||||
[
|
||||
["mean", {}, 10, None],
|
||||
["sum", {}, 10, None],
|
||||
["count", {}, 0, 0],
|
||||
["median", {}, 10, None],
|
||||
["min", {}, 10, None],
|
||||
["max", {}, 10, None],
|
||||
["std", {}, 10, None],
|
||||
["std", {"ddof": 0}, 10, None],
|
||||
["var", {}, 10, None],
|
||||
["var", {"ddof": 0}, 10, None],
|
||||
],
|
||||
)
|
||||
def test_center_reindex_series(series, roll_func, kwargs, minp, fill_value):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
getattr(
|
||||
series.reindex(list(series.index) + s).rolling(window=25, min_periods=minp),
|
||||
roll_func,
|
||||
)(**kwargs)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = getattr(
|
||||
series.rolling(window=25, min_periods=minp, center=True), roll_func
|
||||
)(**kwargs)
|
||||
if fill_value is not None:
|
||||
series_xp = series_xp.fillna(fill_value)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roll_func, kwargs, minp, fill_value",
|
||||
[
|
||||
["mean", {}, 10, None],
|
||||
["sum", {}, 10, None],
|
||||
["count", {}, 0, 0],
|
||||
["median", {}, 10, None],
|
||||
["min", {}, 10, None],
|
||||
["max", {}, 10, None],
|
||||
["std", {}, 10, None],
|
||||
["std", {"ddof": 0}, 10, None],
|
||||
["var", {}, 10, None],
|
||||
["var", {"ddof": 0}, 10, None],
|
||||
],
|
||||
)
|
||||
def test_center_reindex_frame(frame, roll_func, kwargs, minp, fill_value):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
getattr(
|
||||
frame.reindex(list(frame.index) + s).rolling(window=25, min_periods=minp),
|
||||
roll_func,
|
||||
)(**kwargs)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = getattr(
|
||||
frame.rolling(window=25, min_periods=minp, center=True), roll_func
|
||||
)(**kwargs)
|
||||
if fill_value is not None:
|
||||
frame_xp = frame_xp.fillna(fill_value)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).max(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).min(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).std(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).var(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).quantile(q=0.5),
|
||||
lambda x: x.rolling(window=10, min_periods=5).median(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
|
||||
pytest.param(
|
||||
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
|
||||
marks=td.skip_if_no("scipy"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rolling_functions_window_non_shrinkage(f):
|
||||
# GH 7764
|
||||
s = Series(range(4))
|
||||
s_expected = Series(np.nan, index=s.index)
|
||||
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=["A", "B"])
|
||||
df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df_result = f(df)
|
||||
tm.assert_frame_equal(df_result, df_expected)
|
||||
|
||||
|
||||
def test_rolling_max_gh6297(step):
|
||||
"""Replicate result expected in GH #6297"""
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 2 datapoints on one of the days
|
||||
indices.append(datetime(1975, 1, 3, 6, 0))
|
||||
series = Series(range(1, 7), index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
expected = Series(
|
||||
[1.0, 2.0, 6.0, 4.0, 5.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").max().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_max_resample(step):
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be max
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 20.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").max().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# Now specify median (10.0)
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 10.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").median().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# Now specify mean (4+10+20)/3
|
||||
v = (4.0 + 10.0 + 20.0) / 3.0
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, v],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
x = series.resample("D").mean().rolling(window=1, step=step).max()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_min_resample(step):
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be min
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 4.0],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)[::step]
|
||||
r = series.resample("D").min().rolling(window=1, step=step)
|
||||
tm.assert_series_equal(expected, r.min())
|
||||
|
||||
|
||||
def test_rolling_median_resample():
|
||||
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
||||
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
||||
indices.append(datetime(1975, 1, 5, 1))
|
||||
indices.append(datetime(1975, 1, 5, 2))
|
||||
series = Series(list(range(5)) + [10, 20], index=indices)
|
||||
# Use floats instead of ints as values
|
||||
series = series.map(lambda x: float(x))
|
||||
# Sort chronologically
|
||||
series = series.sort_index()
|
||||
|
||||
# Default how should be median
|
||||
expected = Series(
|
||||
[0.0, 1.0, 2.0, 3.0, 10],
|
||||
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
|
||||
)
|
||||
x = series.resample("D").median().rolling(window=1).median()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_median_memory_error():
|
||||
# GH11722
|
||||
n = 20000
|
||||
Series(np.random.default_rng(2).standard_normal(n)).rolling(
|
||||
window=2, center=False
|
||||
).median()
|
||||
Series(np.random.default_rng(2).standard_normal(n)).rolling(
|
||||
window=2, center=False
|
||||
).median()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_type",
|
||||
[np.dtype(f"f{width}") for width in [4, 8]]
|
||||
+ [np.dtype(f"{sign}{width}") for width in [1, 2, 4, 8] for sign in "ui"],
|
||||
)
|
||||
def test_rolling_min_max_numeric_types(data_type):
|
||||
# GH12373
|
||||
|
||||
# Just testing that these don't throw exceptions and that
|
||||
# the return type is float64. Other tests will cover quantitative
|
||||
# correctness
|
||||
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).max()
|
||||
assert result.dtypes[0] == np.dtype("f8")
|
||||
result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).min()
|
||||
assert result.dtypes[0] == np.dtype("f8")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
lambda x: x.rolling(window=10, min_periods=0).count(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).max(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).min(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).std(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).var(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).quantile(0.5),
|
||||
lambda x: x.rolling(window=10, min_periods=5).median(),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False),
|
||||
lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True),
|
||||
pytest.param(
|
||||
lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(),
|
||||
marks=td.skip_if_no("scipy"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_moment_functions_zero_length(f):
|
||||
# GH 8056
|
||||
s = Series(dtype=np.float64)
|
||||
s_expected = s
|
||||
df1 = DataFrame()
|
||||
df1_expected = df1
|
||||
df2 = DataFrame(columns=["a"])
|
||||
df2["a"] = df2["a"].astype("float64")
|
||||
df2_expected = df2
|
||||
|
||||
s_result = f(s)
|
||||
tm.assert_series_equal(s_result, s_expected)
|
||||
|
||||
df1_result = f(df1)
|
||||
tm.assert_frame_equal(df1_result, df1_expected)
|
||||
|
||||
df2_result = f(df2)
|
||||
tm.assert_frame_equal(df2_result, df2_expected)
|
@@ -0,0 +1,182 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
def scoreatpercentile(a, per):
|
||||
values = np.sort(a, axis=0)
|
||||
|
||||
idx = int(per / 1.0 * (values.shape[0] - 1))
|
||||
|
||||
if idx == values.shape[0] - 1:
|
||||
retval = values[-1]
|
||||
|
||||
else:
|
||||
qlow = idx / (values.shape[0] - 1)
|
||||
qhig = (idx + 1) / (values.shape[0] - 1)
|
||||
vlow = values[idx]
|
||||
vhig = values[idx + 1]
|
||||
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
|
||||
|
||||
return retval
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_series(series, q, step):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
result = series.rolling(50, step=step).quantile(q)
|
||||
assert isinstance(result, Series)
|
||||
end = range(0, len(series), step or 1)[-1] + 1
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_frame(raw, frame, q, step):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
result = frame.rolling(50, step=step).quantile(q)
|
||||
assert isinstance(result, DataFrame)
|
||||
end = range(0, len(frame), step or 1)[-1] + 1
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_time_rule_series(series, q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = ser.rolling(window=win, min_periods=10).quantile(q)
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_time_rule_frame(raw, frame, q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = frm.rolling(window=win, min_periods=10).quantile(q)
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_nans(q):
|
||||
compare_func = partial(scoreatpercentile, per=q)
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(50, min_periods=30).quantile(q)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = obj.rolling(20, min_periods=15).quantile(q)
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = obj2.rolling(10, min_periods=5).quantile(q)
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = obj.rolling(20, min_periods=0).quantile(q)
|
||||
result1 = obj.rolling(20, min_periods=1).quantile(q)
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_min_periods(series, minp, q, step):
|
||||
result = series.rolling(len(series) + 1, min_periods=minp, step=step).quantile(q)
|
||||
expected = series.rolling(len(series), min_periods=minp, step=step).quantile(q)
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center(q):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = obj.rolling(20, center=True).quantile(q)
|
||||
expected = (
|
||||
concat([obj, Series([np.nan] * 9)])
|
||||
.rolling(20)
|
||||
.quantile(q)
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center_reindex_series(series, q):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
series.reindex(list(series.index) + s)
|
||||
.rolling(window=25)
|
||||
.quantile(q)
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
|
||||
series_rs = series.rolling(window=25, center=True).quantile(q)
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
|
||||
def test_center_reindex_frame(frame, q):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
frame.reindex(list(frame.index) + s)
|
||||
.rolling(window=25)
|
||||
.quantile(q)
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = frame.rolling(window=25, center=True).quantile(q)
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_keyword_quantile_deprecated():
|
||||
# GH #52550
|
||||
s = Series([1, 2, 3, 4])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
s.rolling(2).quantile(quantile=0.4)
|
@@ -0,0 +1,227 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_series(series, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
result = getattr(series.rolling(50), roll_func)()
|
||||
assert isinstance(result, Series)
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_frame(raw, frame, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
result = getattr(frame.rolling(50), roll_func)()
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_series_equal(
|
||||
result.iloc[-1, :],
|
||||
frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_time_rule_series(series, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
win = 25
|
||||
ser = series[::2].resample("B").mean()
|
||||
series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)()
|
||||
last_date = series_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_series = series[::2].truncate(prev_date, last_date)
|
||||
tm.assert_almost_equal(series_result.iloc[-1], compare_func(trunc_series))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_time_rule_frame(raw, frame, sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
win = 25
|
||||
frm = frame[::2].resample("B").mean()
|
||||
frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)()
|
||||
last_date = frame_result.index[-1]
|
||||
prev_date = last_date - 24 * offsets.BDay()
|
||||
|
||||
trunc_frame = frame[::2].truncate(prev_date, last_date)
|
||||
tm.assert_series_equal(
|
||||
frame_result.xs(last_date),
|
||||
trunc_frame.apply(compare_func, raw=raw),
|
||||
check_names=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
|
||||
def test_nans(sp_func, roll_func):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
compare_func = partial(getattr(sp_stats, sp_func), bias=False)
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(50, min_periods=30), roll_func)()
|
||||
tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
|
||||
|
||||
# min_periods is working correctly
|
||||
result = getattr(obj.rolling(20, min_periods=15), roll_func)()
|
||||
assert isna(result.iloc[23])
|
||||
assert not isna(result.iloc[24])
|
||||
|
||||
assert not isna(result.iloc[-6])
|
||||
assert isna(result.iloc[-5])
|
||||
|
||||
obj2 = Series(np.random.default_rng(2).standard_normal(20))
|
||||
result = getattr(obj2.rolling(10, min_periods=5), roll_func)()
|
||||
assert isna(result.iloc[3])
|
||||
assert notna(result.iloc[4])
|
||||
|
||||
result0 = getattr(obj.rolling(20, min_periods=0), roll_func)()
|
||||
result1 = getattr(obj.rolling(20, min_periods=1), roll_func)()
|
||||
tm.assert_almost_equal(result0, result1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("minp", [0, 99, 100])
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_min_periods(series, minp, roll_func, step):
|
||||
result = getattr(
|
||||
series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
|
||||
)()
|
||||
expected = getattr(
|
||||
series.rolling(len(series), min_periods=minp, step=step), roll_func
|
||||
)()
|
||||
nan_mask = isna(result)
|
||||
tm.assert_series_equal(nan_mask, isna(expected))
|
||||
|
||||
nan_mask = ~nan_mask
|
||||
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center(roll_func):
|
||||
obj = Series(np.random.default_rng(2).standard_normal(50))
|
||||
obj[:10] = np.nan
|
||||
obj[-10:] = np.nan
|
||||
|
||||
result = getattr(obj.rolling(20, center=True), roll_func)()
|
||||
expected = (
|
||||
getattr(concat([obj, Series([np.nan] * 9)]).rolling(20), roll_func)()
|
||||
.iloc[9:]
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center_reindex_series(series, roll_func):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
series_xp = (
|
||||
getattr(
|
||||
series.reindex(list(series.index) + s).rolling(window=25),
|
||||
roll_func,
|
||||
)()
|
||||
.shift(-12)
|
||||
.reindex(series.index)
|
||||
)
|
||||
series_rs = getattr(series.rolling(window=25, center=True), roll_func)()
|
||||
tm.assert_series_equal(series_xp, series_rs)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
|
||||
def test_center_reindex_frame(frame, roll_func):
|
||||
# shifter index
|
||||
s = [f"x{x:d}" for x in range(12)]
|
||||
|
||||
frame_xp = (
|
||||
getattr(
|
||||
frame.reindex(list(frame.index) + s).rolling(window=25),
|
||||
roll_func,
|
||||
)()
|
||||
.shift(-12)
|
||||
.reindex(frame.index)
|
||||
)
|
||||
frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)()
|
||||
tm.assert_frame_equal(frame_xp, frame_rs)
|
||||
|
||||
|
||||
def test_rolling_skew_edge_cases(step):
|
||||
expected = Series([np.nan] * 4 + [0.0])[::step]
|
||||
# yields all NaN (0 variance)
|
||||
d = Series([1] * 5)
|
||||
x = d.rolling(window=5, step=step).skew()
|
||||
# index 4 should be 0 as it contains 5 same obs
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
expected = Series([np.nan] * 5)[::step]
|
||||
# yields all NaN (window too small)
|
||||
d = Series(np.random.default_rng(2).standard_normal(5))
|
||||
x = d.rolling(window=2, step=step).skew()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields [NaN, NaN, NaN, 0.177994, 1.548824]
|
||||
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
|
||||
expected = Series([np.nan, np.nan, np.nan, 0.177994, 1.548824])[::step]
|
||||
x = d.rolling(window=4, step=step).skew()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_kurt_edge_cases(step):
|
||||
expected = Series([np.nan] * 4 + [-3.0])[::step]
|
||||
|
||||
# yields all NaN (0 variance)
|
||||
d = Series([1] * 5)
|
||||
x = d.rolling(window=5, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields all NaN (window too small)
|
||||
expected = Series([np.nan] * 5)[::step]
|
||||
d = Series(np.random.default_rng(2).standard_normal(5))
|
||||
x = d.rolling(window=3, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
# yields [NaN, NaN, NaN, 1.224307, 2.671499]
|
||||
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
|
||||
expected = Series([np.nan, np.nan, np.nan, 1.224307, 2.671499])[::step]
|
||||
x = d.rolling(window=4, step=step).kurt()
|
||||
tm.assert_series_equal(expected, x)
|
||||
|
||||
|
||||
def test_rolling_skew_eq_value_fperr(step):
|
||||
# #18804 all rolling skew for all equal values should return Nan
|
||||
# #46717 update: all equal values should return 0 instead of NaN
|
||||
a = Series([1.1] * 15).rolling(window=10, step=step).skew()
|
||||
assert (a[a.index >= 9] == 0).all()
|
||||
assert a[a.index < 9].isna().all()
|
||||
|
||||
|
||||
def test_rolling_kurt_eq_value_fperr(step):
|
||||
# #18804 all rolling kurt for all equal values should return Nan
|
||||
# #46717 update: all equal values should return -3 instead of NaN
|
||||
a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
|
||||
assert (a[a.index >= 9] == -3).all()
|
||||
assert a[a.index < 9].isna().all()
|
@@ -0,0 +1,715 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries import offsets
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def regular():
|
||||
return DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
).set_index("A")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ragged():
|
||||
df = DataFrame({"B": range(5)})
|
||||
df.index = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
return df
|
||||
|
||||
|
||||
class TestRollingTS:
|
||||
# rolling time-series friendly
|
||||
# xref GH13327
|
||||
|
||||
def test_doc_string(self):
|
||||
df = DataFrame(
|
||||
{"B": [0, 1, 2, np.nan, 4]},
|
||||
index=[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
],
|
||||
)
|
||||
df
|
||||
df.rolling("2s").sum()
|
||||
|
||||
def test_invalid_window_non_int(self, regular):
|
||||
# not a valid freq
|
||||
msg = "passed window foobar is not compatible with a datetimelike index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="foobar")
|
||||
# not a datetimelike index
|
||||
msg = "window must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.reset_index().rolling(window="foobar")
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
|
||||
def test_invalid_window_nonfixed(self, freq, regular):
|
||||
# non-fixed freqs
|
||||
msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window=freq)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
|
||||
def test_valid_window(self, freq, regular):
|
||||
regular.rolling(window=freq)
|
||||
|
||||
@pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
|
||||
def test_invalid_minp(self, minp, regular):
|
||||
# non-integer min_periods
|
||||
msg = (
|
||||
r"local variable 'minp' referenced before assignment|"
|
||||
"min_periods must be an integer"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="1D", min_periods=minp)
|
||||
|
||||
def test_on(self, regular):
|
||||
df = regular
|
||||
|
||||
# not a valid column
|
||||
msg = (
|
||||
r"invalid on specified as foobar, must be a column "
|
||||
"\\(of DataFrame\\), an Index or None"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling(window="2s", on="foobar")
|
||||
|
||||
# column is valid
|
||||
df = df.copy()
|
||||
df["C"] = date_range("20130101", periods=len(df))
|
||||
df.rolling(window="2d", on="C").sum()
|
||||
|
||||
# invalid columns
|
||||
msg = "window must be an integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling(window="2d", on="B")
|
||||
|
||||
# ok even though on non-selected
|
||||
df.rolling(window="2d", on="C").B.sum()
|
||||
|
||||
def test_monotonic_on(self):
|
||||
# on/index must be monotonic
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
)
|
||||
|
||||
assert df.A.is_monotonic_increasing
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
df = df.set_index("A")
|
||||
assert df.index.is_monotonic_increasing
|
||||
df.rolling("2s").sum()
|
||||
|
||||
def test_non_monotonic_on(self):
|
||||
# GH 19248
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
|
||||
)
|
||||
df = df.set_index("A")
|
||||
non_monotonic_index = df.index.to_list()
|
||||
non_monotonic_index[0] = non_monotonic_index[3]
|
||||
df.index = non_monotonic_index
|
||||
|
||||
assert not df.index.is_monotonic_increasing
|
||||
|
||||
msg = "index values must be monotonic"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling("2s").sum()
|
||||
|
||||
df = df.reset_index()
|
||||
|
||||
msg = (
|
||||
r"invalid on specified as A, must be a column "
|
||||
"\\(of DataFrame\\), an Index or None"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.rolling("2s", on="A").sum()
|
||||
|
||||
def test_frame_on(self):
|
||||
df = DataFrame(
|
||||
{"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
|
||||
)
|
||||
|
||||
df["A"] = [
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
|
||||
# we are doing simulating using 'on'
|
||||
expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
|
||||
|
||||
result = df.rolling("2s", on="A").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# test as a frame
|
||||
# we should be ignoring the 'on' as an aggregation column
|
||||
# note that the expected is setting, computing, and resetting
|
||||
# so the columns need to be switched compared
|
||||
# to the actual result where they are ordered as in the
|
||||
# original
|
||||
expected = (
|
||||
df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="A")[["B"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_on2(self, unit):
|
||||
# using multiple aggregation columns
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20130101 09:00:00"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:05"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [0, 1, 2, 3, 4],
|
||||
"B": [0, 1, 2, np.nan, 4],
|
||||
"C": dti,
|
||||
},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
expected1 = DataFrame(
|
||||
{"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
|
||||
columns=["A", "C", "B"],
|
||||
)
|
||||
|
||||
result = df.rolling("2s", on="C").sum()
|
||||
expected = expected1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([0, 1, 3, np.nan, 4], name="B")
|
||||
result = df.rolling("2s", on="C").B.sum()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = expected1[["A", "B", "C"]]
|
||||
result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_basic_regular(self, regular):
|
||||
df = regular.copy()
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="1D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.index = date_range("20130101", periods=5, freq="2D")
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1, min_periods=1).sum()
|
||||
result = df.rolling(window="2D", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(window=1).sum()
|
||||
result = df.rolling(window="2D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_min_periods(self, regular):
|
||||
# compare for min_periods
|
||||
df = regular
|
||||
|
||||
# these slightly different
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.rolling(2, min_periods=1).sum()
|
||||
result = df.rolling("2s", min_periods=1).sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed(self, regular, unit):
|
||||
# xref GH13965
|
||||
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20130101 09:00:01"),
|
||||
Timestamp("20130101 09:00:02"),
|
||||
Timestamp("20130101 09:00:03"),
|
||||
Timestamp("20130101 09:00:04"),
|
||||
Timestamp("20130101 09:00:06"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": [1] * 5},
|
||||
index=dti,
|
||||
)
|
||||
|
||||
# closed must be 'right', 'left', 'both', 'neither'
|
||||
msg = "closed must be 'right', 'left', 'both' or 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
regular.rolling(window="2s", closed="blabla")
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="right").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# default should be 'right'
|
||||
result = df.rolling("2s").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [1.0, 2, 3, 3, 2]
|
||||
result = df.rolling("2s", closed="both").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 2, 2, 1]
|
||||
result = df.rolling("2s", closed="left").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
|
||||
result = df.rolling("2s", closed="neither").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_sum(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 3, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s").sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 5, 7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="4s", min_periods=3).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 3, 6, 9]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).sum()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 3, 6, 10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_mean(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).mean()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_median(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).median()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_quantile(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).quantile(0.5)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_std(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).std(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.5] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).std(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_var(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="3s", min_periods=1).var(ddof=0)
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0] + [0.25] * 4
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).var(ddof=1)
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_skew(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="3s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).skew()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_kurt(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="3s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 5
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).kurt()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan] * 4 + [-1.2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_count(self, ragged):
|
||||
df = ragged
|
||||
result = df.rolling(window="1s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 1, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = ragged
|
||||
result = df.rolling(window="1s").count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [1.0, 1, 2, 1, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=2).count()
|
||||
expected = df.copy()
|
||||
expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_regular_min(self):
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
|
||||
).set_index("A")
|
||||
result = df.rolling("1s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
|
||||
).set_index("A")
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.rolling("2s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling("5s").min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [5.0, 4, 3, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_ragged_min(self, ragged):
|
||||
df = ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 1, 3, 3]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).min()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 0, 0, 1, 1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_perf_min(self):
|
||||
N = 10000
|
||||
|
||||
dfp = DataFrame(
|
||||
{"B": np.random.default_rng(2).standard_normal(N)},
|
||||
index=date_range("20130101", periods=N, freq="s"),
|
||||
)
|
||||
expected = dfp.rolling(2, min_periods=1).min()
|
||||
result = dfp.rolling("2s").min()
|
||||
assert ((result - expected) < 0.01).all().all()
|
||||
|
||||
expected = dfp.rolling(200, min_periods=1).min()
|
||||
result = dfp.rolling("200s").min()
|
||||
assert ((result - expected) < 0.01).all().all()
|
||||
|
||||
def test_ragged_max(self, ragged):
|
||||
df = ragged
|
||||
|
||||
result = df.rolling(window="1s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="2s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.rolling(window="5s", min_periods=1).max()
|
||||
expected = df.copy()
|
||||
expected["B"] = [0.0, 1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, op, result_data",
|
||||
[
|
||||
("ms", "min", [0.0] * 10),
|
||||
("ms", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("ms", "max", [0.0] * 9 + [2.0]),
|
||||
("s", "min", [0.0] * 10),
|
||||
("s", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("s", "max", [0.0] * 9 + [2.0]),
|
||||
("min", "min", [0.0] * 10),
|
||||
("min", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("min", "max", [0.0] * 9 + [2.0]),
|
||||
("h", "min", [0.0] * 10),
|
||||
("h", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("h", "max", [0.0] * 9 + [2.0]),
|
||||
("D", "min", [0.0] * 10),
|
||||
("D", "mean", [0.0] * 9 + [2.0 / 9]),
|
||||
("D", "max", [0.0] * 9 + [2.0]),
|
||||
],
|
||||
)
|
||||
def test_freqs_ops(self, freq, op, result_data):
|
||||
# GH 21096
|
||||
index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10)
|
||||
# Explicit cast to float to avoid implicit cast when setting nan
|
||||
s = Series(data=0, index=index, dtype="float")
|
||||
s.iloc[1] = np.nan
|
||||
s.iloc[-1] = 2
|
||||
result = getattr(s.rolling(window=f"10{freq}"), op)()
|
||||
expected = Series(data=result_data, index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
"sum",
|
||||
"mean",
|
||||
"count",
|
||||
"median",
|
||||
"std",
|
||||
"var",
|
||||
"kurt",
|
||||
"skew",
|
||||
"min",
|
||||
"max",
|
||||
],
|
||||
)
|
||||
def test_all(self, f, regular):
|
||||
# simple comparison of integer vs time-based windowing
|
||||
df = regular * 2
|
||||
er = df.rolling(window=1)
|
||||
r = df.rolling(window="1s")
|
||||
|
||||
result = getattr(r, f)()
|
||||
expected = getattr(er, f)()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.quantile(0.5)
|
||||
expected = er.quantile(0.5)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_all2(self, arithmetic_win_operators):
|
||||
f = arithmetic_win_operators
|
||||
# more sophisticated comparison of integer vs.
|
||||
# time-based windowing
|
||||
df = DataFrame(
|
||||
{"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="h")
|
||||
)
|
||||
# in-range data
|
||||
dft = df.between_time("09:00", "16:00")
|
||||
|
||||
r = dft.rolling(window="5h")
|
||||
|
||||
result = getattr(r, f)()
|
||||
|
||||
# we need to roll the days separately
|
||||
# to compare with a time-based roll
|
||||
# finally groupby-apply will return a multi-index
|
||||
# so we need to drop the day
|
||||
def agg_by_day(x):
|
||||
x = x.between_time("09:00", "16:00")
|
||||
return getattr(x.rolling(5, min_periods=1), f)()
|
||||
|
||||
expected = (
|
||||
df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True)
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_cov_offset(self):
|
||||
# GH16058
|
||||
|
||||
idx = date_range("2017-01-01", periods=24, freq="1h")
|
||||
ss = Series(np.arange(len(idx)), index=idx)
|
||||
|
||||
result = ss.rolling("2h").cov()
|
||||
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(2, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
result = ss.rolling("3h").cov()
|
||||
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected2 = ss.rolling(3, min_periods=1).cov()
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
def test_rolling_on_decreasing_index(self, unit):
|
||||
# GH-19248, GH-32385
|
||||
index = DatetimeIndex(
|
||||
[
|
||||
Timestamp("20190101 09:00:30"),
|
||||
Timestamp("20190101 09:00:27"),
|
||||
Timestamp("20190101 09:00:20"),
|
||||
Timestamp("20190101 09:00:18"),
|
||||
Timestamp("20190101 09:00:10"),
|
||||
]
|
||||
).as_unit(unit)
|
||||
|
||||
df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
|
||||
result = df.rolling("5s").min()
|
||||
expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_on_empty(self):
|
||||
# GH-32385
|
||||
df = DataFrame({"column": []}, index=[])
|
||||
result = df.rolling("5s").min()
|
||||
expected = DataFrame({"column": []}, index=[])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rolling_on_multi_index_level(self):
|
||||
# GH-15584
|
||||
df = DataFrame(
|
||||
{"column": range(6)},
|
||||
index=MultiIndex.from_product(
|
||||
[date_range("20190101", periods=3), range(2)], names=["date", "seq"]
|
||||
),
|
||||
)
|
||||
result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
|
||||
expected = DataFrame(
|
||||
{"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
|
||||
def test_nat_axis_error(msg, axis):
|
||||
idx = [Timestamp("2020"), NaT]
|
||||
kwargs = {"columns" if axis == 1 else "index": idx}
|
||||
df = DataFrame(np.eye(2), **kwargs)
|
||||
warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated"
|
||||
if axis == 1:
|
||||
warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
df.rolling("D", axis=axis).mean()
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_arrow_datetime_axis():
|
||||
# GH 55849
|
||||
expected = Series(
|
||||
np.arange(5, dtype=np.float64),
|
||||
index=Index(
|
||||
date_range("2020-01-01", periods=5), dtype="timestamp[ns][pyarrow]"
|
||||
),
|
||||
)
|
||||
result = expected.rolling("1D").sum()
|
||||
tm.assert_series_equal(result, expected)
|
@@ -0,0 +1,688 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timedelta,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import BaseIndexer
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"triang",
|
||||
"blackman",
|
||||
"hamming",
|
||||
"bartlett",
|
||||
"bohman",
|
||||
"blackmanharris",
|
||||
"nuttall",
|
||||
"barthann",
|
||||
]
|
||||
)
|
||||
def win_types(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"])
|
||||
def win_types_special(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_constructor(frame_or_series):
|
||||
# GH 12669
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
|
||||
# valid
|
||||
c(win_type="boxcar", window=2, min_periods=1)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=True)
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
|
||||
def test_invalid_constructor(frame_or_series, w):
|
||||
# not valid
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
with pytest.raises(ValueError, match="min_periods must be an integer"):
|
||||
c(win_type="boxcar", window=2, min_periods=w)
|
||||
with pytest.raises(ValueError, match="center must be a boolean"):
|
||||
c(win_type="boxcar", window=2, min_periods=1, center=w)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("wt", ["foobar", 1])
|
||||
def test_invalid_constructor_wintype(frame_or_series, wt):
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
with pytest.raises(ValueError, match="Invalid win_type"):
|
||||
c(win_type=wt, window=2)
|
||||
|
||||
|
||||
def test_constructor_with_win_type(frame_or_series, win_types):
|
||||
# GH 12669
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
c(win_type=win_types, window=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["median", "kurt", "skew"])
|
||||
def test_agg_function_support(arg):
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame({"A": np.arange(5)})
|
||||
roll = df.rolling(2, win_type="triang")
|
||||
|
||||
msg = f"'{arg}' is not a valid function for 'Window' object"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg(arg)
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg([arg])
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
roll.agg({"A": arg})
|
||||
|
||||
|
||||
def test_invalid_scipy_arg():
|
||||
# This error is raised by scipy
|
||||
pytest.importorskip("scipy")
|
||||
msg = r"boxcar\(\) got an unexpected"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Series(range(3)).rolling(1, win_type="boxcar").mean(foo="bar")
|
||||
|
||||
|
||||
def test_constructor_with_win_type_invalid(frame_or_series):
|
||||
# GH 13383
|
||||
pytest.importorskip("scipy")
|
||||
c = frame_or_series(range(5)).rolling
|
||||
|
||||
msg = "window must be an integer 0 or greater"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
c(-1, win_type="boxcar")
|
||||
|
||||
|
||||
def test_window_with_args(step):
|
||||
# make sure that we are aggregating window functions correctly with arg
|
||||
pytest.importorskip("scipy")
|
||||
r = Series(np.random.default_rng(2).standard_normal(100)).rolling(
|
||||
window=10, min_periods=1, win_type="gaussian", step=step
|
||||
)
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["<lambda>", "<lambda>"]
|
||||
result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def a(x):
|
||||
return x.mean(std=10)
|
||||
|
||||
def b(x):
|
||||
return x.mean(std=0.01)
|
||||
|
||||
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
|
||||
expected.columns = ["a", "b"]
|
||||
result = r.aggregate([a, b])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_win_type_with_method_invalid():
|
||||
pytest.importorskip("scipy")
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="'single' is the only supported method type."
|
||||
):
|
||||
Series(range(1)).rolling(1, win_type="triang", method="table")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", [2000000000, "2s", Timedelta("2s")])
|
||||
def test_consistent_win_type_freq(arg):
|
||||
# GH 15969
|
||||
pytest.importorskip("scipy")
|
||||
s = Series(range(1))
|
||||
with pytest.raises(ValueError, match="Invalid win_type freq"):
|
||||
s.rolling(arg, win_type="freq")
|
||||
|
||||
|
||||
def test_win_type_freq_return_none():
|
||||
# GH 48838
|
||||
freq_roll = Series(range(2), index=date_range("2020", periods=2)).rolling("2s")
|
||||
assert freq_roll.win_type is None
|
||||
|
||||
|
||||
def test_win_type_not_implemented():
|
||||
pytest.importorskip("scipy")
|
||||
|
||||
class CustomIndexer(BaseIndexer):
|
||||
def get_window_bounds(self, num_values, min_periods, center, closed, step):
|
||||
return np.array([0, 1]), np.array([1, 2])
|
||||
|
||||
df = DataFrame({"values": range(2)})
|
||||
indexer = CustomIndexer()
|
||||
with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"):
|
||||
df.rolling(indexer, win_type="boxcar")
|
||||
|
||||
|
||||
def test_cmov_mean(step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
result = Series(vals).rolling(5, center=True, step=step).mean()
|
||||
expected_values = [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.962,
|
||||
11.27,
|
||||
11.564,
|
||||
12.516,
|
||||
12.818,
|
||||
12.952,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
expected = Series(expected_values)[::step]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_cmov_window(step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
result = Series(vals).rolling(5, win_type="boxcar", center=True, step=step).mean()
|
||||
expected_values = [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.962,
|
||||
11.27,
|
||||
11.564,
|
||||
12.516,
|
||||
12.818,
|
||||
12.952,
|
||||
np.nan,
|
||||
np.nan,
|
||||
]
|
||||
expected = Series(expected_values)[::step]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_cmov_window_corner(step):
|
||||
# GH 8238
|
||||
# all nan
|
||||
pytest.importorskip("scipy")
|
||||
vals = Series([np.nan] * 10)
|
||||
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
|
||||
assert np.isnan(result).all()
|
||||
|
||||
# empty
|
||||
vals = Series([], dtype=object)
|
||||
result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
|
||||
assert len(result) == 0
|
||||
|
||||
# shorter than window
|
||||
vals = Series(np.random.default_rng(2).standard_normal(5))
|
||||
result = vals.rolling(10, win_type="boxcar", step=step).mean()
|
||||
assert np.isnan(result).all()
|
||||
assert len(result) == len(range(0, 5, step or 1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f,xp",
|
||||
[
|
||||
(
|
||||
"mean",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[9.252, 9.392],
|
||||
[8.644, 9.906],
|
||||
[8.87, 10.208],
|
||||
[6.81, 8.588],
|
||||
[7.792, 8.644],
|
||||
[9.05, 7.824],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"std",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[3.789706, 4.068313],
|
||||
[3.429232, 3.237411],
|
||||
[3.589269, 3.220810],
|
||||
[3.405195, 2.380655],
|
||||
[3.281839, 2.369869],
|
||||
[3.676846, 1.801799],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"var",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[14.36187, 16.55117],
|
||||
[11.75963, 10.48083],
|
||||
[12.88285, 10.37362],
|
||||
[11.59535, 5.66752],
|
||||
[10.77047, 5.61628],
|
||||
[13.51920, 3.24648],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
(
|
||||
"sum",
|
||||
[
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
[46.26, 46.96],
|
||||
[43.22, 49.53],
|
||||
[44.35, 51.04],
|
||||
[34.05, 42.94],
|
||||
[38.96, 43.22],
|
||||
[45.25, 39.12],
|
||||
[np.nan, np.nan],
|
||||
[np.nan, np.nan],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_cmov_window_frame(f, xp, step):
|
||||
# Gh 8238
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame(
|
||||
np.array(
|
||||
[
|
||||
[12.18, 3.64],
|
||||
[10.18, 9.16],
|
||||
[13.24, 14.61],
|
||||
[4.51, 8.11],
|
||||
[6.15, 11.44],
|
||||
[9.14, 6.21],
|
||||
[11.31, 10.67],
|
||||
[2.94, 6.51],
|
||||
[9.42, 8.39],
|
||||
[12.44, 7.34],
|
||||
]
|
||||
)
|
||||
)
|
||||
xp = DataFrame(np.array(xp))[::step]
|
||||
|
||||
roll = df.rolling(5, win_type="boxcar", center=True, step=step)
|
||||
rs = getattr(roll, f)()
|
||||
|
||||
tm.assert_frame_equal(xp, rs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4, 5])
|
||||
def test_cmov_window_na_min_periods(step, min_periods):
|
||||
pytest.importorskip("scipy")
|
||||
vals = Series(np.random.default_rng(2).standard_normal(10))
|
||||
vals[4] = np.nan
|
||||
vals[8] = np.nan
|
||||
|
||||
xp = vals.rolling(5, min_periods=min_periods, center=True, step=step).mean()
|
||||
rs = vals.rolling(
|
||||
5, win_type="boxcar", min_periods=min_periods, center=True, step=step
|
||||
).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
xps = {
|
||||
"hamming": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.71384,
|
||||
9.56348,
|
||||
12.38009,
|
||||
14.03687,
|
||||
13.8567,
|
||||
11.81473,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"triang": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.28667,
|
||||
10.34667,
|
||||
12.00556,
|
||||
13.33889,
|
||||
13.38,
|
||||
12.33667,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"barthann": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
14.0825,
|
||||
11.5675,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"bohman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.61599,
|
||||
9.1764,
|
||||
12.83559,
|
||||
14.17267,
|
||||
14.65923,
|
||||
11.10401,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"blackmanharris": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
6.97691,
|
||||
9.16438,
|
||||
13.05052,
|
||||
14.02156,
|
||||
15.10512,
|
||||
10.74574,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"nuttall": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.04618,
|
||||
9.16786,
|
||||
13.02671,
|
||||
14.03559,
|
||||
15.05657,
|
||||
10.78514,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"blackman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
7.73345,
|
||||
9.17869,
|
||||
12.79607,
|
||||
14.20036,
|
||||
14.57726,
|
||||
11.16988,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"bartlett": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
14.0825,
|
||||
11.5675,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types])[::step]
|
||||
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular_linear_range(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array(range(10), dtype=float)
|
||||
xp = vals.copy()
|
||||
xp[:2] = np.nan
|
||||
xp[-2:] = np.nan
|
||||
xp = Series(xp)[::step]
|
||||
|
||||
rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_regular_missing_data(win_types, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
vals = np.array(
|
||||
[6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48]
|
||||
)
|
||||
xps = {
|
||||
"bartlett": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.70333,
|
||||
10.5225,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
15.61667,
|
||||
13.655,
|
||||
],
|
||||
"blackman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.04582,
|
||||
11.41536,
|
||||
7.73345,
|
||||
9.17869,
|
||||
12.79607,
|
||||
14.20036,
|
||||
15.8706,
|
||||
13.655,
|
||||
],
|
||||
"barthann": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.70333,
|
||||
10.5225,
|
||||
8.4425,
|
||||
9.1925,
|
||||
12.5575,
|
||||
14.3675,
|
||||
15.61667,
|
||||
13.655,
|
||||
],
|
||||
"bohman": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.9444,
|
||||
11.56327,
|
||||
7.61599,
|
||||
9.1764,
|
||||
12.83559,
|
||||
14.17267,
|
||||
15.90976,
|
||||
13.655,
|
||||
],
|
||||
"hamming": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.59321,
|
||||
10.29694,
|
||||
8.71384,
|
||||
9.56348,
|
||||
12.38009,
|
||||
14.20565,
|
||||
15.24694,
|
||||
13.69758,
|
||||
],
|
||||
"nuttall": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.47693,
|
||||
12.2821,
|
||||
7.04618,
|
||||
9.16786,
|
||||
13.02671,
|
||||
14.03673,
|
||||
16.08759,
|
||||
13.65553,
|
||||
],
|
||||
"triang": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.33167,
|
||||
9.76125,
|
||||
9.28667,
|
||||
10.34667,
|
||||
12.00556,
|
||||
13.82125,
|
||||
14.49429,
|
||||
13.765,
|
||||
],
|
||||
"blackmanharris": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.42526,
|
||||
12.36824,
|
||||
6.97691,
|
||||
9.16438,
|
||||
13.05052,
|
||||
14.02175,
|
||||
16.1098,
|
||||
13.65509,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types])[::step]
|
||||
rs = Series(vals).rolling(5, win_type=win_types, min_periods=3, step=step).mean()
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_special(win_types_special, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
kwds = {
|
||||
"kaiser": {"beta": 1.0},
|
||||
"gaussian": {"std": 1.0},
|
||||
"general_gaussian": {"p": 2.0, "sig": 2.0},
|
||||
"exponential": {"tau": 10},
|
||||
}
|
||||
|
||||
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
|
||||
|
||||
xps = {
|
||||
"gaussian": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
8.97297,
|
||||
9.76077,
|
||||
12.24763,
|
||||
13.89053,
|
||||
13.65671,
|
||||
12.01002,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"general_gaussian": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.85011,
|
||||
10.71589,
|
||||
11.73161,
|
||||
13.08516,
|
||||
12.95111,
|
||||
12.74577,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"kaiser": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.86851,
|
||||
11.02969,
|
||||
11.65161,
|
||||
12.75129,
|
||||
12.90702,
|
||||
12.83757,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
"exponential": [
|
||||
np.nan,
|
||||
np.nan,
|
||||
9.83364,
|
||||
11.10472,
|
||||
11.64551,
|
||||
12.66138,
|
||||
12.92379,
|
||||
12.83770,
|
||||
np.nan,
|
||||
np.nan,
|
||||
],
|
||||
}
|
||||
|
||||
xp = Series(xps[win_types_special])[::step]
|
||||
rs = (
|
||||
Series(vals)
|
||||
.rolling(5, win_type=win_types_special, center=True, step=step)
|
||||
.mean(**kwds[win_types_special])
|
||||
)
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_cmov_window_special_linear_range(win_types_special, step):
|
||||
# GH 8238
|
||||
pytest.importorskip("scipy")
|
||||
kwds = {
|
||||
"kaiser": {"beta": 1.0},
|
||||
"gaussian": {"std": 1.0},
|
||||
"general_gaussian": {"p": 2.0, "sig": 2.0},
|
||||
"slepian": {"width": 0.5},
|
||||
"exponential": {"tau": 10},
|
||||
}
|
||||
|
||||
vals = np.array(range(10), dtype=float)
|
||||
xp = vals.copy()
|
||||
xp[:2] = np.nan
|
||||
xp[-2:] = np.nan
|
||||
xp = Series(xp)[::step]
|
||||
|
||||
rs = (
|
||||
Series(vals)
|
||||
.rolling(5, win_type=win_types_special, center=True, step=step)
|
||||
.mean(**kwds[win_types_special])
|
||||
)
|
||||
tm.assert_series_equal(xp, rs)
|
||||
|
||||
|
||||
def test_weighted_var_big_window_no_segfault(win_types, center):
|
||||
# GitHub Issue #46772
|
||||
pytest.importorskip("scipy")
|
||||
x = Series(0)
|
||||
result = x.rolling(window=16, center=center, win_type=win_types).var()
|
||||
expected = Series(np.nan)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_rolling_center_axis_1():
|
||||
pytest.importorskip("scipy")
|
||||
df = DataFrame(
|
||||
{"a": [1, 1, 0, 0, 0, 1], "b": [1, 0, 0, 1, 0, 0], "c": [1, 0, 0, 1, 0, 1]}
|
||||
)
|
||||
|
||||
msg = "Support for axis=1 in DataFrame.rolling is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.rolling(window=3, axis=1, win_type="boxcar", center=True).sum()
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [np.nan] * 6, "b": [3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "c": [np.nan] * 6}
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected, check_dtype=True)
|
Reference in New Issue
Block a user