Updated script that can be controled by Nodejs web app
This commit is contained in:
143
lib/python3.13/site-packages/pandas/tests/resample/conftest.py
Normal file
143
lib/python3.13/site-packages/pandas/tests/resample/conftest.py
Normal file
@ -0,0 +1,143 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = [
|
||||
"min",
|
||||
"max",
|
||||
"first",
|
||||
"last",
|
||||
"sum",
|
||||
"mean",
|
||||
"sem",
|
||||
"median",
|
||||
"prod",
|
||||
"var",
|
||||
"std",
|
||||
"ohlc",
|
||||
"quantile",
|
||||
]
|
||||
upsample_methods = ["count", "size"]
|
||||
series_methods = ["nunique"]
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_start():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_end():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_freq():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return "D"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_name():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
|
||||
"""
|
||||
Fixture for parametrization of date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _static_values(index):
|
||||
"""
|
||||
Fixture for parametrization of values used in parametrization of
|
||||
Series and DataFrames with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return np.arange(len(index))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
"""
|
||||
Fixture for parametrization of Series name for Series used with
|
||||
date_range, period_range and timedelta_range indexes
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of Series with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return Series(_static_values, index=index, name=_series_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty Series with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
return series[:0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of DataFrame with date_range, period_range
|
||||
and timedelta_range indexes
|
||||
"""
|
||||
# _series_name is intentionally unused
|
||||
return DataFrame({"value": _static_values}, index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
index = series.index[:0]
|
||||
return DataFrame(index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_and_frame(frame_or_series, series, frame):
|
||||
"""
|
||||
Fixture for parametrization of Series and DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
if frame_or_series == Series:
|
||||
return series
|
||||
if frame_or_series == DataFrame:
|
||||
return frame
|
460
lib/python3.13/site-packages/pandas/tests/resample/test_base.py
Normal file
460
lib/python3.13/site-packages/pandas/tests/resample/test_base.py
Normal file
@ -0,0 +1,460 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
from pandas.core.resample import _asfreq_compat
|
||||
|
||||
# a fixture value can be overridden by the test parameter value. Note that the
|
||||
# value of the fixture can be overridden this way even if the test doesn't use
|
||||
# it directly (doesn't mention it in the function prototype).
|
||||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501
|
||||
# in this module we override the fixture values defined in conftest.py
|
||||
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
||||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
||||
|
||||
all_ts = pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end",
|
||||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
"""return the _index_factory created using the args, kwargs"""
|
||||
return _index_factory(*args, **kwargs)
|
||||
|
||||
return _create_index
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2D", "1h"])
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq(series_and_frame, freq, create_index):
|
||||
obj = series_and_frame
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq_fill_value(series, create_index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
ser = series
|
||||
|
||||
result = ser.resample("1h").asfreq()
|
||||
new_index = create_index(ser.index[0], ser.index[-1], freq="1h")
|
||||
expected = ser.reindex(new_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Explicit cast to float to avoid implicit cast when setting None
|
||||
frame = ser.astype("float").to_frame("value")
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample("1h").asfreq(fill_value=4.0)
|
||||
new_index = create_index(frame.index[0], frame.index[-1], freq="1h")
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_interpolate(frame):
|
||||
# GH#12925
|
||||
df = frame
|
||||
warn = None
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = df.resample("1min").asfreq().interpolate()
|
||||
expected = df.resample("1min").interpolate()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
||||
"but got an instance of 'RangeIndex'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
xp.resample("YE")
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
ser = empty_series_dti
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(ser.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
|
||||
)
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser.copy()
|
||||
expected.index = _asfreq_compat(ser.index, freq)
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize(
|
||||
"freq",
|
||||
[
|
||||
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
|
||||
"D",
|
||||
"h",
|
||||
],
|
||||
)
|
||||
def test_resample_nat_index_series(freq, series, resample_method):
|
||||
# GH39227
|
||||
|
||||
ser = series.copy()
|
||||
ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
|
||||
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = ser[:0].copy()
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
@pytest.mark.parametrize("resample_method", ["count", "size"])
|
||||
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH28427
|
||||
ser = empty_series_dti
|
||||
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(ser.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq)
|
||||
|
||||
result = getattr(rs, resample_method)()
|
||||
|
||||
index = _asfreq_compat(ser.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index, name=ser.name)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
|
||||
# GH13212
|
||||
df = empty_frame_dti
|
||||
# count retains dimensions too
|
||||
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample(freq, group_keys=False)
|
||||
return
|
||||
elif freq == "ME" and isinstance(df.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(df.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = df.resample(freq, group_keys=False)
|
||||
result = getattr(rs, resample_method)()
|
||||
if resample_method == "ohlc":
|
||||
# TODO: no tests with len(df.columns) > 0
|
||||
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
|
||||
expected = DataFrame(
|
||||
[], index=df.index[:0].copy(), columns=mi, dtype=np.float64
|
||||
)
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
elif resample_method != "size":
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([], dtype=np.int64)
|
||||
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_count_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
warn = None
|
||||
if isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_frame_dti.resample(freq)
|
||||
result = rs.count()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_resample_size_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_frame_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(empty_frame_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_frame_dti.resample(freq)
|
||||
result = rs.size()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
PeriodIndex([], freq="M", name="a"),
|
||||
DatetimeIndex([], name="a"),
|
||||
TimedeltaIndex([], name="a"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
warn = None
|
||||
if isinstance(index, PeriodIndex):
|
||||
# GH#53511
|
||||
index = PeriodIndex([], freq="B", name=index.name)
|
||||
warn = FutureWarning
|
||||
msg = "Resampling with a PeriodIndex is deprecated"
|
||||
|
||||
empty_series_dti = Series([], index, dtype)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = empty_series_dti.resample("d", group_keys=False)
|
||||
try:
|
||||
getattr(rs, resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object_)
|
||||
pass
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
|
||||
def test_apply_to_empty_series(empty_series_dti, freq):
|
||||
# GH 14313
|
||||
ser = empty_series_dti
|
||||
|
||||
if freq == "ME" and isinstance(empty_series_dti.index, TimedeltaIndex):
|
||||
msg = (
|
||||
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
|
||||
"e.g. '24h' or '3D', not <MonthEnd>"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
empty_series_dti.resample(freq)
|
||||
return
|
||||
elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex):
|
||||
# index is PeriodIndex, so convert to corresponding Period freq
|
||||
freq = "M"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(empty_series_dti.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
rs = ser.resample(freq, group_keys=False)
|
||||
|
||||
result = rs.apply(lambda x: 1)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
expected = ser.resample(freq).apply("sum")
|
||||
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resampler_is_iterable(series):
|
||||
# GH 15314
|
||||
freq = "h"
|
||||
tg = Grouper(freq=freq, convention="start")
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(series.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
grouped = series.groupby(tg)
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
tm.assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_quantile(series):
|
||||
# GH 15023
|
||||
ser = series
|
||||
q = 0.75
|
||||
freq = "h"
|
||||
|
||||
msg = "Resampling with a PeriodIndex"
|
||||
warn = None
|
||||
if isinstance(series.index, PeriodIndex):
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = ser.resample(freq).quantile(q)
|
||||
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("how", ["first", "last"])
|
||||
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
|
||||
# GH#57019
|
||||
if is_extension_array_dtype(any_real_nullable_dtype):
|
||||
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
|
||||
else:
|
||||
na_value = np.nan
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [2, 1, 1, 2],
|
||||
"b": [na_value, 3.0, na_value, 4.0],
|
||||
"c": [na_value, 3.0, na_value, 4.0],
|
||||
},
|
||||
index=date_range("2020-01-01", periods=4, freq="D"),
|
||||
dtype=any_real_nullable_dtype,
|
||||
)
|
||||
rs = df.resample("ME")
|
||||
method = getattr(rs, how)
|
||||
result = method(skipna=skipna)
|
||||
|
||||
gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")])
|
||||
expected = getattr(gb, how)(skipna=skipna)
|
||||
expected.index.freq = "ME"
|
||||
tm.assert_frame_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,715 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_frame():
|
||||
return DataFrame(
|
||||
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
|
||||
index=date_range("1/1/2000", freq="s", periods=40),
|
||||
)
|
||||
|
||||
|
||||
def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = dedent(
|
||||
"""\
|
||||
import numpy as np
|
||||
from pandas import Series, date_range
|
||||
data = np.arange(10, dtype=np.float64)
|
||||
index = date_range("2020-01-01", periods=len(data))
|
||||
s = Series(data, index=index)
|
||||
rs = s.resample("D")
|
||||
"""
|
||||
)
|
||||
ip.run_cell(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("rs.", 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [
|
||||
["2010-01-01", "A", 2],
|
||||
["2010-01-02", "A", 3],
|
||||
["2010-01-05", "A", 8],
|
||||
["2010-01-10", "A", 7],
|
||||
["2010-01-13", "A", 3],
|
||||
["2010-01-01", "B", 5],
|
||||
["2010-01-03", "B", 2],
|
||||
["2010-01-04", "B", 1],
|
||||
["2010-01-11", "B", 7],
|
||||
["2010-01-14", "B", 3],
|
||||
]
|
||||
|
||||
df = DataFrame(data, columns=["date", "id", "score"])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f_0(x):
|
||||
return x.set_index("date").resample("D").asfreq()
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = df.groupby("id").apply(f_0)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.set_index("date").groupby("id").resample("D").asfreq()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("1D").ffill()
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = df.groupby("group").apply(f_1)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("group").resample("1D").ffill()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample("2s").mean())
|
||||
|
||||
result = g.resample("2s").B.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample("2s").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = g.resample("2s").mean().B
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
|
||||
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
|
||||
r = df.groupby("id").resample("1D")
|
||||
result = r["buyer"].count()
|
||||
|
||||
exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=exp_mi,
|
||||
name="buyer",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = r["buyer"].count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = DataFrame(
|
||||
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
|
||||
)
|
||||
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
|
||||
result = df.groupby("id").resample("2D", on="date")["data"].sum()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_groupby_with_origin():
|
||||
# GH 31809
|
||||
|
||||
freq = "1399min" # prime number that is smaller than 24h
|
||||
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
|
||||
middle = "1/15/2000 00:00:00"
|
||||
|
||||
rng = date_range(start, end, freq="1231min") # prime number
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
ts2 = ts[middle:end]
|
||||
|
||||
# proves that grouper without a fixed origin does not work
|
||||
# when dealing with unusual frequencies
|
||||
simple_grouper = pd.Grouper(freq=freq)
|
||||
count_ts = ts.groupby(simple_grouper).agg("count")
|
||||
count_ts = count_ts[middle:end]
|
||||
count_ts2 = ts2.groupby(simple_grouper).agg("count")
|
||||
with pytest.raises(AssertionError, match="Index are different"):
|
||||
tm.assert_index_equal(count_ts.index, count_ts2.index)
|
||||
|
||||
# test origin on 1970-01-01 00:00:00
|
||||
origin = Timestamp(0)
|
||||
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
|
||||
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
|
||||
adjusted_count_ts = adjusted_count_ts[middle:end]
|
||||
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
|
||||
|
||||
# test origin on 2049-10-18 20:00:00
|
||||
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
|
||||
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
|
||||
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
|
||||
adjusted2_count_ts = adjusted2_count_ts[middle:end]
|
||||
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
|
||||
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
|
||||
|
||||
# both grouper use an adjusted timestamp that is a multiple of 1399 min
|
||||
# they should be equals even if the adjusted_timestamp is in the future
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = date_range("1/1/2000", periods=3, freq="min")
|
||||
result = Series(range(3), index=index).resample("20s").nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:40",
|
||||
"2000-01-01 00:01:00",
|
||||
"2000-01-01 00:01:20",
|
||||
"2000-01-01 00:01:40",
|
||||
"2000-01-01 00:02:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="20s",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"f",
|
||||
[
|
||||
"first",
|
||||
"last",
|
||||
"median",
|
||||
"sem",
|
||||
"sum",
|
||||
"mean",
|
||||
"min",
|
||||
"max",
|
||||
"size",
|
||||
"count",
|
||||
"nearest",
|
||||
"bfill",
|
||||
"ffill",
|
||||
"asfreq",
|
||||
"ohlc",
|
||||
],
|
||||
)
|
||||
def test_methods(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = getattr(r, f)()
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods_nunique(test_frame):
|
||||
# series only
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
result = r.B.nunique()
|
||||
expected = g.B.apply(lambda x: x.resample("2s").nunique())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("f", ["std", "var"])
|
||||
def test_methods_std_var(f, test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = getattr(r, f)(ddof=1)
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(test_frame):
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
# reduction
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
expected = g.resample("2s").sum()
|
||||
|
||||
def f_0(x):
|
||||
return x.resample("2s").sum()
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = r.apply(f_0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def f_1(x):
|
||||
return x.resample("2s").apply(lambda y: y.sum())
|
||||
|
||||
msg = "DataFrameGroupBy.apply operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = g.apply(f_1)
|
||||
# y.sum() results in int64 instead of int32 on 32-bit architectures
|
||||
expected = expected.astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = date_range("1-1-2015", "12-31-15", freq="D")
|
||||
df = DataFrame(
|
||||
data={"col1": np.random.default_rng(2).random(len(index))}, index=index
|
||||
)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=["a", "b"])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
|
||||
|
||||
result = df.resample("ME").apply(f)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
|
||||
result = df["col1"].resample("ME").apply(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_columns_multilevel():
|
||||
# GH 16231
|
||||
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
|
||||
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
|
||||
df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
|
||||
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
|
||||
result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
|
||||
expected = DataFrame(
|
||||
2 * [[0, 0.0]],
|
||||
index=date_range(start="2017-01-01", freq="1h", periods=2),
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("A", "a", "", "one"), ("B", "b", "i", "two")]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_non_naive_index():
|
||||
def weighted_quantile(series, weights, q):
|
||||
series = series.sort_values()
|
||||
cumsum = weights.reindex(series.index).fillna(0).cumsum()
|
||||
cutoff = cumsum.iloc[-1] * q
|
||||
return series[cumsum >= cutoff].iloc[0]
|
||||
|
||||
times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
|
||||
data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
|
||||
weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
|
||||
|
||||
result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
|
||||
ind = date_range(
|
||||
"2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
|
||||
)
|
||||
expected = Series([1.0], index=ind)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label(unit):
|
||||
# GH 13235
|
||||
index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
|
||||
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("col0").resample("1W", label="left").sum()
|
||||
|
||||
mi = [
|
||||
np.array([0, 0, 1, 2], dtype=np.int64),
|
||||
np.array(
|
||||
["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
|
||||
dtype=f"M8[{unit}]",
|
||||
),
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
|
||||
expected = DataFrame(
|
||||
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window(test_frame):
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = Index([1, 2, 3], name="A")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("A").resample("2s").mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby("A").rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 3)),
|
||||
columns=list("aaa"),
|
||||
index=date_range("2012-01-01", periods=20, freq="s"),
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.columns = ["a", "b", "c"]
|
||||
expected = df2.resample("5s").median()
|
||||
result = df.resample("5s").median()
|
||||
expected.columns = result.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_one_column_of_df():
|
||||
# GH: 36951
|
||||
df = DataFrame(
|
||||
{"col": range(10), "col1": range(10, 20)},
|
||||
index=date_range("2012-01-01", periods=10, freq="20min"),
|
||||
)
|
||||
|
||||
# access "col" via getattr -> make sure we handle AttributeError
|
||||
result = df.resample("h").apply(lambda group: group.col.sum())
|
||||
expected = Series(
|
||||
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# access "col" via _getitem__ -> make sure we handle KeyErrpr
|
||||
result = df.resample("h").apply(lambda group: group["col"].sum())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg():
|
||||
# GH: 33548
|
||||
df = DataFrame(
|
||||
{
|
||||
"cat": [
|
||||
"cat_1",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
],
|
||||
"num": [5, 20, 22, 3, 4, 30, 10, 50],
|
||||
"date": [
|
||||
"2019-2-1",
|
||||
"2018-02-03",
|
||||
"2020-3-11",
|
||||
"2019-2-2",
|
||||
"2019-2-2",
|
||||
"2018-12-4",
|
||||
"2020-3-11",
|
||||
"2020-12-12",
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
|
||||
resampled = df.groupby("cat").resample("YE", on="date")
|
||||
expected = resampled[["num"]].sum()
|
||||
result = resampled.agg({"num": "sum"})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg_listlike():
|
||||
# GH 42905
|
||||
ts = Timestamp("2021-02-28 00:00:00")
|
||||
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
|
||||
resampled = df.groupby("class").resample("ME")["value"]
|
||||
result = resampled.agg(["sum", "size"])
|
||||
expected = DataFrame(
|
||||
[[69, 1]],
|
||||
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
|
||||
columns=["sum", "size"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_empty(keys):
|
||||
# GH 26411
|
||||
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected = (
|
||||
DataFrame(columns=["a", "b"])
|
||||
.set_index(keys, drop=False)
|
||||
.set_index(TimedeltaIndex([]), append=True)
|
||||
)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
|
||||
# https://github.com/pandas-dev/pandas/issues/39329
|
||||
|
||||
dates = date_range("2020-01-01", periods=15, freq="D")
|
||||
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
|
||||
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
|
||||
df = pd.concat([df1, df2], ignore_index=True)
|
||||
if consolidate:
|
||||
df = df._consolidate()
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(["key"]).resample("W", on="date").min()
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
["A"] * 3 + ["B"] * 3,
|
||||
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
|
||||
"ns"
|
||||
),
|
||||
],
|
||||
names=["key", "date"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"key": ["A"] * 3 + ["B"] * 3,
|
||||
"col1": [0, 5, 12] * 2,
|
||||
"col_object": ["val"] * 3 + [np.nan] * 3,
|
||||
},
|
||||
index=idx,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_with_list_of_keys():
|
||||
# GH 47362
|
||||
df = DataFrame(
|
||||
data={
|
||||
"date": date_range(start="2016-01-01", periods=8),
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [1, 7, 5, 2, 3, 10, 5, 1],
|
||||
}
|
||||
)
|
||||
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [4.0, 3.5, 6.5, 3.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_resample_no_index(keys):
|
||||
# GH 47705
|
||||
df = DataFrame([], columns=["a", "b", "date"])
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
df = df.set_index("date")
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
|
||||
expected["date"] = pd.to_datetime(expected["date"])
|
||||
expected = expected.set_index("date", append=True, drop=True)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_no_columns():
|
||||
# GH#52484
|
||||
df = DataFrame(
|
||||
index=Index(
|
||||
pd.to_datetime(
|
||||
["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
|
||||
),
|
||||
name="date",
|
||||
)
|
||||
)
|
||||
result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
|
||||
index = pd.to_datetime(
|
||||
[
|
||||
"2018-01-01 00:00:00",
|
||||
"2018-01-01 06:00:00",
|
||||
"2018-01-01 12:00:00",
|
||||
"2018-01-02 00:00:00",
|
||||
]
|
||||
)
|
||||
expected = DataFrame(
|
||||
index=pd.MultiIndex(
|
||||
levels=[np.array([0, 1], dtype=np.intp), index],
|
||||
codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
|
||||
names=[None, "date"],
|
||||
)
|
||||
)
|
||||
|
||||
# GH#52710 - Index comes out as 32-bit on 64-bit Windows
|
||||
tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
|
||||
|
||||
|
||||
def test_groupby_resample_size_all_index_same():
|
||||
# GH 46826
|
||||
df = DataFrame(
|
||||
{"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
|
||||
index=date_range("31/12/2000 18:00", freq="h", periods=12),
|
||||
)
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = df.groupby("A").resample("D").size()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 1, 2, 2],
|
||||
pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
|
||||
],
|
||||
names=["A", None],
|
||||
)
|
||||
expected = Series(
|
||||
3,
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys():
|
||||
# GH 50840
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"val": [2.0, 2.5, 7.0, 4.0],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
|
||||
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
|
||||
},
|
||||
index=date_range(start="2016-01-01", periods=8, name="date"),
|
||||
)
|
||||
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
|
||||
|
||||
mi_exp = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"first_val": [2.0, 2.5, 7.0, 4.0],
|
||||
"second_val": [4.5, 4.5, 5.0, 4.5],
|
||||
},
|
||||
index=mi_exp,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
|
||||
# GH 50876
|
||||
df = DataFrame(
|
||||
data={
|
||||
"group": [0, 0, 0, 0, 1, 1, 1, 1],
|
||||
"val": [3, 1, 4, 1, 5, 9, 2, 6],
|
||||
},
|
||||
index=Series(
|
||||
date_range(start="2016-01-01", periods=8),
|
||||
name="date",
|
||||
),
|
||||
)
|
||||
gb = df.groupby("group")
|
||||
rs = gb.resample("2D")
|
||||
with pytest.raises(KeyError, match="Columns not found"):
|
||||
rs[["val_not_in_dataframe"]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind", ["datetime", "period"])
|
||||
def test_groupby_resample_kind(kind):
|
||||
# GH 24103
|
||||
df = DataFrame(
|
||||
{
|
||||
"datetime": pd.to_datetime(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"]
|
||||
),
|
||||
"group": ["A", "B", "A", "B"],
|
||||
"value": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
df = df.set_index("datetime")
|
||||
result = df.groupby("group")["value"].resample("D", kind=kind).last()
|
||||
|
||||
dt_level = pd.DatetimeIndex(["2018-11-01", "2018-11-02"])
|
||||
if kind == "period":
|
||||
dt_level = dt_level.to_period(freq="D")
|
||||
expected_index = pd.MultiIndex.from_product(
|
||||
[["A", "B"], dt_level],
|
||||
names=["group", "datetime"],
|
||||
)
|
||||
expected = Series([1, 3, 2, 4], index=expected_index, name="value")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,390 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_series():
|
||||
return Series(
|
||||
np.random.default_rng(2).standard_normal(1000),
|
||||
index=date_range("1/1/2000", periods=1000),
|
||||
)
|
||||
|
||||
|
||||
def test_apply(test_series):
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count(test_series):
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
grouper = Grouper(freq="YE", label="right", closed="right")
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample("YE").count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction(test_series):
|
||||
result = test_series.resample("YE", closed="right").prod()
|
||||
|
||||
msg = "using SeriesGroupBy.prod"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({"open": 1, "close": 2}, index=ind)
|
||||
tg = Grouper(freq="ME")
|
||||
|
||||
grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df["close"] / df["open"]
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([1, 2]),
|
||||
Index(["a", "b"]),
|
||||
Index([1.1, 2.2]),
|
||||
pd.MultiIndex.from_arrays([[1, 2], ["a", "b"]]),
|
||||
],
|
||||
)
|
||||
def test_fails_on_no_datetime_index(index):
|
||||
name = type(index).__name__
|
||||
df = DataFrame({"a": range(len(index))}, index=index)
|
||||
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
f"or PeriodIndex, but got an instance of '{name}'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(Grouper(freq="D"))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4))
|
||||
df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
grouped = df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.xfail(reason="if TimeGrouper is used included, 'nth' doesn't work yet")
|
||||
def test_aggregate_nth():
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((20, 4))
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = normal_grouped.nth(3)
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
dt_result = dt_grouped.nth(3)
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, unit",
|
||||
[
|
||||
("sum", {}, 0),
|
||||
("sum", {"min_count": 0}, 0),
|
||||
("sum", {"min_count": 1}, np.nan),
|
||||
("prod", {}, 1),
|
||||
("prod", {"min_count": 0}, 1),
|
||||
("prod", {"min_count": 1}, np.nan),
|
||||
],
|
||||
)
|
||||
def test_resample_entirely_nat_window(method, method_args, unit):
|
||||
ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4))
|
||||
result = methodcaller(method, **method_args)(ser.resample("2d"))
|
||||
|
||||
exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D")
|
||||
expected = Series([0.0, unit], index=exp_dti)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, fill_value",
|
||||
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
|
||||
)
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
dti = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)
|
||||
expected.index = dti._with_freq(None) # TODO: is this desired?
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = Index(
|
||||
[
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
]
|
||||
* 4,
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(
|
||||
start="2013-01-01",
|
||||
freq="D",
|
||||
periods=5,
|
||||
name="key",
|
||||
unit=dt_df["key"]._values.unit,
|
||||
)._with_freq(None)
|
||||
tm.assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(Grouper(key="A", freq="h"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin='start_day')"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
result = repr(Grouper(key="A", freq="h", origin="2000-01-01"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, expected_values",
|
||||
[
|
||||
("sum", {}, [1, 0, 1]),
|
||||
("sum", {"min_count": 0}, [1, 0, 1]),
|
||||
("sum", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
("prod", {}, [1, 1, 1]),
|
||||
("prod", {"min_count": 0}, [1, 1, 1]),
|
||||
("prod", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
ser = Series(1, index=date_range("2017", periods=2, freq="h"))
|
||||
resampled = ser.resample("30min")
|
||||
index = pd.DatetimeIndex(
|
||||
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
|
||||
dtype="M8[ns]",
|
||||
freq="30min",
|
||||
)
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate():
|
||||
# GH 35325
|
||||
d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
|
||||
|
||||
df = DataFrame(d)
|
||||
|
||||
df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
|
||||
|
||||
msg = "DataFrameGroupBy.resample operated on the grouping columns"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
result = (
|
||||
df.set_index("week_starting")
|
||||
.groupby("volume")
|
||||
.resample("1D")
|
||||
.interpolate(method="linear")
|
||||
)
|
||||
|
||||
volume = [50] * 15 + [60]
|
||||
week_starting = list(date_range("2018-01-07", "2018-01-21")) + [
|
||||
Timestamp("2018-01-14")
|
||||
]
|
||||
expected_ind = pd.MultiIndex.from_arrays(
|
||||
[volume, week_starting],
|
||||
names=["volume", "week_starting"],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"price": [
|
||||
10.0,
|
||||
9.928571428571429,
|
||||
9.857142857142858,
|
||||
9.785714285714286,
|
||||
9.714285714285714,
|
||||
9.642857142857142,
|
||||
9.571428571428571,
|
||||
9.5,
|
||||
9.428571428571429,
|
||||
9.357142857142858,
|
||||
9.285714285714286,
|
||||
9.214285714285714,
|
||||
9.142857142857142,
|
||||
9.071428571428571,
|
||||
9.0,
|
||||
11.0,
|
||||
],
|
||||
"volume": [50.0] * 15 + [60],
|
||||
},
|
||||
index=expected_ind,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,220 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample("1min").asfreq()
|
||||
expected = DataFrame(
|
||||
data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range("0 day", periods=4, freq="1min"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
|
||||
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
|
||||
expected = DataFrame(
|
||||
{"value": [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range("0 day", periods=3, freq="1s"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range("00:00:00", "00:10:00", freq="5min")
|
||||
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
|
||||
result = df.resample("2min").asfreq()
|
||||
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(
|
||||
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
expected = DataFrame({"A": np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = timedelta_range("0 days", freq="30min", periods=50)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min")
|
||||
)
|
||||
result = df.resample("30min").sum()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = df["A"]
|
||||
result = s.resample("30min").sum()
|
||||
tm.assert_series_equal(result, expected["A"])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
|
||||
result = s.resample("2s").sum()
|
||||
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
# GH 12072
|
||||
index = timedelta_range("0", periods=9, freq="10ms")
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample("10ms").mean()
|
||||
expected = series.astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_offset_with_timedeltaindex():
|
||||
# GH 10530 & 31809
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample("2s", offset="5s").mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
|
||||
df["Group"] = df["Group_obj"].astype("category")
|
||||
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
|
||||
exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit(
|
||||
"ns"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
|
||||
index=exp_tdi,
|
||||
)
|
||||
expected = expected.reindex(["Group_obj", "Group"], axis=1)
|
||||
expected["Group"] = expected["Group_obj"].astype("category")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range("1 day", "6 day", freq="4D")
|
||||
df = DataFrame({"time": times}, index=times)
|
||||
|
||||
times2 = timedelta_range("1 day", "6 day", freq="2D")
|
||||
exp = Series(times2, index=times2, name="time")
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample("2D").first()["time"]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df["time"].resample("2D").first()
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, resample_freq",
|
||||
[
|
||||
("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
|
||||
("3h", "22h", "1h", "5h"),
|
||||
("527D", "5006D", "3D", "10D"),
|
||||
("1D", "10D", "1D", "2D"), # GH 13022 example
|
||||
# tests that worked before GH 33498:
|
||||
("8h", "21h59min50s", "10s", "2h"),
|
||||
("0h", "21h59min50s", "10s", "3h"),
|
||||
("10D", "85D", "D", "2D"),
|
||||
],
|
||||
)
|
||||
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
|
||||
# GH 33498
|
||||
# check that the timedelta bins does not contains an extra bin
|
||||
idx = timedelta_range(start=start, end=end, freq=freq)
|
||||
s = Series(np.arange(len(idx)), index=idx)
|
||||
result = s.resample(resample_freq).min()
|
||||
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
assert result.index.freq == expected_index.freq
|
||||
assert not np.isnan(result.iloc[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duplicates", [True, False])
|
||||
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
|
||||
# GH 10603
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).normal(size=(10000, 4)),
|
||||
index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
|
||||
)
|
||||
if duplicates:
|
||||
# case with non-unique columns
|
||||
df.columns = ["A", "B", "A", "C"]
|
||||
|
||||
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
|
||||
|
||||
expected = DataFrame(
|
||||
[[768] * 4] * 12 + [[528] * 4],
|
||||
index=timedelta_range(start="1s", periods=13, freq="3s"),
|
||||
)
|
||||
expected.columns = df.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
||||
def test_resample_quantile_timedelta(unit):
|
||||
# GH: 29485
|
||||
dtype = np.dtype(f"m8[{unit}]")
|
||||
df = DataFrame(
|
||||
{"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
|
||||
index=pd.date_range("20200101", periods=4, tz="UTC"),
|
||||
)
|
||||
result = df.resample("2D").quantile(0.99)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"value": [
|
||||
pd.Timedelta("0 days 00:00:00.990000"),
|
||||
pd.Timedelta("0 days 00:00:02.990000"),
|
||||
]
|
||||
},
|
||||
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
|
||||
).astype(dtype)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_closed_right():
|
||||
# GH#45414
|
||||
idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
|
||||
ser = Series(range(10), index=idx)
|
||||
result = ser.resample("min", closed="right", label="right").sum()
|
||||
expected = Series(
|
||||
[0, 3, 7, 11, 15, 9],
|
||||
index=pd.TimedeltaIndex(
|
||||
[pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_arrow_duration_resample():
|
||||
# GH 56371
|
||||
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
|
||||
expected = Series(np.arange(5, dtype=np.float64), index=idx)
|
||||
result = expected.resample("1D").mean()
|
||||
tm.assert_series_equal(result, expected)
|
Reference in New Issue
Block a user