Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,72 @@
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
notna,
|
||||
)
|
||||
|
||||
|
||||
def create_series():
|
||||
return [
|
||||
Series(dtype=np.float64, name="a"),
|
||||
Series([np.nan] * 5),
|
||||
Series([1.0] * 5),
|
||||
Series(range(5, 0, -1)),
|
||||
Series(range(5)),
|
||||
Series([np.nan, 1.0, np.nan, 1.0, 1.0]),
|
||||
Series([np.nan, 1.0, np.nan, 2.0, 3.0]),
|
||||
Series([np.nan, 1.0, np.nan, 3.0, 2.0]),
|
||||
]
|
||||
|
||||
|
||||
def create_dataframes():
|
||||
return [
|
||||
DataFrame(columns=["a", "a"]),
|
||||
DataFrame(np.arange(15).reshape((5, 3)), columns=["a", "a", 99]),
|
||||
] + [DataFrame(s) for s in create_series()]
|
||||
|
||||
|
||||
def is_constant(x):
|
||||
values = x.values.ravel("K")
|
||||
return len(set(values[notna(values)])) == 1
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=(
|
||||
obj
|
||||
for obj in itertools.chain(create_series(), create_dataframes())
|
||||
if is_constant(obj)
|
||||
),
|
||||
)
|
||||
def consistent_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=create_series())
|
||||
def series_data(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=itertools.chain(create_series(), create_dataframes()))
|
||||
def all_data(request):
|
||||
"""
|
||||
Test:
|
||||
- Empty Series / DataFrame
|
||||
- All NaN
|
||||
- All consistent value
|
||||
- Monotonically decreasing
|
||||
- Monotonically increasing
|
||||
- Monotonically consistent with NaNs
|
||||
- Monotonically increasing with NaNs
|
||||
- Monotonically decreasing with NaNs
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 2])
|
||||
def min_periods(request):
|
||||
return request.param
|
@ -0,0 +1,243 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def create_mock_weights(obj, com, adjust, ignore_na):
|
||||
if isinstance(obj, DataFrame):
|
||||
if not len(obj.columns):
|
||||
return DataFrame(index=obj.index, columns=obj.columns)
|
||||
w = concat(
|
||||
[
|
||||
create_mock_series_weights(
|
||||
obj.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na
|
||||
)
|
||||
for i in range(len(obj.columns))
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
w.index = obj.index
|
||||
w.columns = obj.columns
|
||||
return w
|
||||
else:
|
||||
return create_mock_series_weights(obj, com, adjust, ignore_na)
|
||||
|
||||
|
||||
def create_mock_series_weights(s, com, adjust, ignore_na):
|
||||
w = Series(np.nan, index=s.index, name=s.name)
|
||||
alpha = 1.0 / (1.0 + com)
|
||||
if adjust:
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
w.iat[i] = pow(1.0 / (1.0 - alpha), count)
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
else:
|
||||
sum_wts = 0.0
|
||||
prev_i = -1
|
||||
count = 0
|
||||
for i in range(len(s)):
|
||||
if s.iat[i] == s.iat[i]:
|
||||
if prev_i == -1:
|
||||
w.iat[i] = 1.0
|
||||
else:
|
||||
w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, count - prev_i)
|
||||
sum_wts += w.iat[i]
|
||||
prev_i = count
|
||||
count += 1
|
||||
elif not ignore_na:
|
||||
count += 1
|
||||
return w
|
||||
|
||||
|
||||
def test_ewm_consistency_mean(all_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
result = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
expected = all_data.multiply(weights).cumsum().divide(weights.cumsum()).ffill()
|
||||
expected[
|
||||
all_data.expanding().count() < (max(min_periods, 1) if min_periods else 1)
|
||||
] = np.nan
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_ewm_consistency_consistent(consistent_data, adjust, ignore_na, min_periods):
|
||||
com = 3.0
|
||||
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(consistent_data)
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_ewm_consistency_var_debiasing_factors(
|
||||
all_data, adjust, ignore_na, min_periods
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=False)
|
||||
var_biased_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=True)
|
||||
|
||||
weights = create_mock_weights(all_data, com=com, adjust=adjust, ignore_na=ignore_na)
|
||||
cum_sum = weights.cumsum().ffill()
|
||||
cum_sum_sq = (weights * weights).cumsum().ffill()
|
||||
numerator = cum_sum * cum_sum
|
||||
denominator = numerator - cum_sum_sq
|
||||
denominator[denominator <= 0.0] = np.nan
|
||||
var_debiasing_factors_x = numerator / denominator
|
||||
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
|
||||
mean_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if bias:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if not bias:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_std(all_data, adjust, ignore_na, min_periods, bias):
|
||||
com = 3.0
|
||||
var_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(all_data, bias=bias)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bias", [True, False])
|
||||
def test_ewm_consistency_series_cov_corr(
|
||||
series_data, adjust, ignore_na, min_periods, bias
|
||||
):
|
||||
com = 3.0
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.var(bias=bias)
|
||||
)
|
||||
var_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
var_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).var(bias=bias)
|
||||
cov_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).cov(series_data, bias=bias)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).corr(series_data)
|
||||
std_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
std_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).std(bias=bias)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if bias:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_y = series_data.ewm(
|
||||
com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_expanding_apply_consistency_sum_nans(request, all_data, min_periods, f):
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
expanding_f_result = all_data.expanding(min_periods=min_periods).sum()
|
||||
expanding_apply_f_result = all_data.expanding(min_periods=min_periods).apply(
|
||||
func=f, raw=True
|
||||
)
|
||||
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x2 = (all_data * all_data).expanding(min_periods=min_periods).mean()
|
||||
mean_x = all_data.expanding(min_periods=min_periods).mean()
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(consistent_data, min_periods, ddof):
|
||||
count_x = consistent_data.expanding(min_periods=min_periods).count()
|
||||
var_x = consistent_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_var_std_cov(all_data, min_periods, ddof):
|
||||
var_x = all_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.expanding(min_periods=min_periods).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_expanding_consistency_series_cov_corr(series_data, min_periods, ddof):
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data).expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
var_y = series_data.expanding(min_periods=min_periods).var(ddof=ddof)
|
||||
cov_x_y = series_data.expanding(min_periods=min_periods).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.expanding(min_periods=min_periods).corr(series_data)
|
||||
std_x = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
std_y = series_data.expanding(min_periods=min_periods).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_y = series_data.expanding(min_periods=min_periods).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data).expanding(min_periods=min_periods).mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_expanding_consistency_mean(all_data, min_periods):
|
||||
result = all_data.expanding(min_periods=min_periods).mean()
|
||||
expected = (
|
||||
all_data.expanding(min_periods=min_periods).sum()
|
||||
/ all_data.expanding(min_periods=min_periods).count()
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_expanding_consistency_constant(consistent_data, min_periods):
|
||||
count_x = consistent_data.expanding().count()
|
||||
mean_x = consistent_data.expanding(min_periods=min_periods).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.expanding(min_periods=min_periods).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_expanding_consistency_var_debiasing_factors(all_data, min_periods):
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.expanding(min_periods=min_periods).var()
|
||||
var_biased_x = all_data.expanding(min_periods=min_periods).var(ddof=0)
|
||||
var_debiasing_factors_x = all_data.expanding().count() / (
|
||||
all_data.expanding().count() - 1.0
|
||||
).replace(0.0, np.nan)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
@ -0,0 +1,244 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def no_nans(x):
|
||||
return x.notna().all().all()
|
||||
|
||||
|
||||
def all_na(x):
|
||||
return x.isnull().all().all()
|
||||
|
||||
|
||||
@pytest.fixture(params=[(1, 0), (5, 1)])
|
||||
def rolling_consistency_cases(request):
|
||||
"""window, min_periods"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||||
def test_rolling_apply_consistency_sum(
|
||||
request, all_data, rolling_consistency_cases, center, f
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
if f is np.sum:
|
||||
if not no_nans(all_data) and not (
|
||||
all_na(all_data) and not all_data.empty and min_periods > 0
|
||||
):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="np.sum has different behavior with NaNs")
|
||||
)
|
||||
rolling_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).sum()
|
||||
rolling_apply_f_result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).apply(func=f, raw=True)
|
||||
tm.assert_equal(rolling_f_result, rolling_apply_f_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var(all_data, rolling_consistency_cases, center, ddof):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||||
mean_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x2 = (
|
||||
(all_data * all_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_moments_consistency_var_constant(
|
||||
consistent_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
var_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
|
||||
# check that variance of constant series is identically 0
|
||||
assert not (var_x > 0).any().any()
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = 0.0
|
||||
if ddof == 1:
|
||||
expected[count_x < 2] = np.nan
|
||||
tm.assert_equal(var_x, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_var_std_cov(
|
||||
all_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x = all_data.rolling(window=window, min_periods=min_periods, center=center).var(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (var_x < 0).any().any()
|
||||
|
||||
std_x = all_data.rolling(window=window, min_periods=min_periods, center=center).std(
|
||||
ddof=ddof
|
||||
)
|
||||
assert not (std_x < 0).any().any()
|
||||
|
||||
# check that var(x) == std(x)^2
|
||||
tm.assert_equal(var_x, std_x * std_x)
|
||||
|
||||
cov_x_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(all_data, ddof=ddof)
|
||||
assert not (cov_x_x < 0).any().any()
|
||||
|
||||
# check that var(x) == cov(x, x)
|
||||
tm.assert_equal(var_x, cov_x_x)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ddof", [0, 1])
|
||||
def test_rolling_consistency_series_cov_corr(
|
||||
series_data, rolling_consistency_cases, center, ddof
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
var_x_plus_y = (
|
||||
(series_data + series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.var(ddof=ddof)
|
||||
)
|
||||
var_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
var_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=ddof)
|
||||
cov_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).cov(series_data, ddof=ddof)
|
||||
# check that cov(x, y) == (var(x+y) - var(x) -
|
||||
# var(y)) / 2
|
||||
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||||
|
||||
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||||
# std(y))
|
||||
corr_x_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(series_data)
|
||||
std_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
std_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).std(ddof=ddof)
|
||||
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||||
|
||||
if ddof == 0:
|
||||
# check that biased cov(x, y) == mean(x*y) -
|
||||
# mean(x)*mean(y)
|
||||
mean_x = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_y = series_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
mean_x_times_y = (
|
||||
(series_data * series_data)
|
||||
.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.mean()
|
||||
)
|
||||
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||||
|
||||
|
||||
def test_rolling_consistency_mean(all_data, rolling_consistency_cases, center):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
result = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
expected = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.sum()
|
||||
.divide(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
)
|
||||
)
|
||||
tm.assert_equal(result, expected.astype("float64"))
|
||||
|
||||
|
||||
def test_rolling_consistency_constant(
|
||||
consistent_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
count_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
mean_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).mean()
|
||||
# check that correlation of a series with itself is either 1 or NaN
|
||||
corr_x_x = consistent_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).corr(consistent_data)
|
||||
|
||||
exp = (
|
||||
consistent_data.max()
|
||||
if isinstance(consistent_data, Series)
|
||||
else consistent_data.max().max()
|
||||
)
|
||||
|
||||
# check mean of constant series
|
||||
expected = consistent_data * np.nan
|
||||
expected[count_x >= max(min_periods, 1)] = exp
|
||||
tm.assert_equal(mean_x, expected)
|
||||
|
||||
# check correlation of constant series with itself is NaN
|
||||
expected[:] = np.nan
|
||||
tm.assert_equal(corr_x_x, expected)
|
||||
|
||||
|
||||
def test_rolling_consistency_var_debiasing_factors(
|
||||
all_data, rolling_consistency_cases, center
|
||||
):
|
||||
window, min_periods = rolling_consistency_cases
|
||||
|
||||
# check variance debiasing factors
|
||||
var_unbiased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var()
|
||||
var_biased_x = all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).var(ddof=0)
|
||||
var_debiasing_factors_x = (
|
||||
all_data.rolling(window=window, min_periods=min_periods, center=center)
|
||||
.count()
|
||||
.divide(
|
||||
(
|
||||
all_data.rolling(
|
||||
window=window, min_periods=min_periods, center=center
|
||||
).count()
|
||||
- 1.0
|
||||
).replace(0.0, np.nan)
|
||||
)
|
||||
)
|
||||
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
Reference in New Issue
Block a user