Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,143 @@
from datetime import datetime
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
# The various methods we support
downsample_methods = [
"min",
"max",
"first",
"last",
"sum",
"mean",
"sem",
"median",
"prod",
"var",
"std",
"ohlc",
"quantile",
]
upsample_methods = ["count", "size"]
series_methods = ["nunique"]
resample_methods = downsample_methods + upsample_methods + series_methods
@pytest.fixture(params=downsample_methods)
def downsample_method(request):
"""Fixture for parametrization of Grouper downsample methods."""
return request.param
@pytest.fixture(params=resample_methods)
def resample_method(request):
"""Fixture for parametrization of Grouper resample methods."""
return request.param
@pytest.fixture
def _index_start():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 1)
@pytest.fixture
def _index_end():
"""Fixture for parametrization of index, series and frame."""
return datetime(2005, 1, 10)
@pytest.fixture
def _index_freq():
"""Fixture for parametrization of index, series and frame."""
return "D"
@pytest.fixture
def _index_name():
"""Fixture for parametrization of index, series and frame."""
return None
@pytest.fixture
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
"""
Fixture for parametrization of date_range, period_range and
timedelta_range indexes
"""
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
@pytest.fixture
def _static_values(index):
"""
Fixture for parametrization of values used in parametrization of
Series and DataFrames with date_range, period_range and
timedelta_range indexes
"""
return np.arange(len(index))
@pytest.fixture
def _series_name():
"""
Fixture for parametrization of Series name for Series used with
date_range, period_range and timedelta_range indexes
"""
return None
@pytest.fixture
def series(index, _series_name, _static_values):
"""
Fixture for parametrization of Series with date_range, period_range and
timedelta_range indexes
"""
return Series(_static_values, index=index, name=_series_name)
@pytest.fixture
def empty_series_dti(series):
"""
Fixture for parametrization of empty Series with date_range,
period_range and timedelta_range indexes
"""
return series[:0]
@pytest.fixture
def frame(index, _series_name, _static_values):
"""
Fixture for parametrization of DataFrame with date_range, period_range
and timedelta_range indexes
"""
# _series_name is intentionally unused
return DataFrame({"value": _static_values}, index=index)
@pytest.fixture
def empty_frame_dti(series):
"""
Fixture for parametrization of empty DataFrame with date_range,
period_range and timedelta_range indexes
"""
index = series.index[:0]
return DataFrame(index=index)
@pytest.fixture
def series_and_frame(frame_or_series, series, frame):
"""
Fixture for parametrization of Series and DataFrame with date_range,
period_range and timedelta_range indexes
"""
if frame_or_series == Series:
return series
if frame_or_series == DataFrame:
return frame

View File

@ -0,0 +1,460 @@
from datetime import datetime
import numpy as np
import pytest
from pandas.core.dtypes.common import is_extension_array_dtype
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
NaT,
PeriodIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.groupby.groupby import DataError
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.period import period_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.resample import _asfreq_compat
# a fixture value can be overridden by the test parameter value. Note that the
# value of the fixture can be overridden this way even if the test doesn't use
# it directly (doesn't mention it in the function prototype).
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501
# in this module we override the fixture values defined in conftest.py
# tuples of '_index_factory,_series_name,_index_start,_index_end'
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
all_ts = pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end",
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
)
@pytest.fixture
def create_index(_index_factory):
def _create_index(*args, **kwargs):
"""return the _index_factory created using the args, kwargs"""
return _index_factory(*args, **kwargs)
return _create_index
@pytest.mark.parametrize("freq", ["2D", "1h"])
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq(series_and_frame, freq, create_index):
obj = series_and_frame
result = obj.resample(freq).asfreq()
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
expected = obj.reindex(new_index)
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
)
def test_asfreq_fill_value(series, create_index):
# test for fill value during resampling, issue 3715
ser = series
result = ser.resample("1h").asfreq()
new_index = create_index(ser.index[0], ser.index[-1], freq="1h")
expected = ser.reindex(new_index)
tm.assert_series_equal(result, expected)
# Explicit cast to float to avoid implicit cast when setting None
frame = ser.astype("float").to_frame("value")
frame.iloc[1] = None
result = frame.resample("1h").asfreq(fill_value=4.0)
new_index = create_index(frame.index[0], frame.index[-1], freq="1h")
expected = frame.reindex(new_index, fill_value=4.0)
tm.assert_frame_equal(result, expected)
@all_ts
def test_resample_interpolate(frame):
# GH#12925
df = frame
warn = None
if isinstance(df.index, PeriodIndex):
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = df.resample("1min").asfreq().interpolate()
expected = df.resample("1min").interpolate()
tm.assert_frame_equal(result, expected)
def test_raises_on_non_datetimelike_index():
# this is a non datetimelike index
xp = DataFrame()
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
"but got an instance of 'RangeIndex'"
)
with pytest.raises(TypeError, match=msg):
xp.resample("YE")
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_series(freq, empty_series_dti, resample_method):
# GH12771 & GH12868
ser = empty_series_dti
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
warn = None
if isinstance(ser.index, PeriodIndex):
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(warn, match=msg):
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
)
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser.copy()
expected.index = _asfreq_compat(ser.index, freq)
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@all_ts
@pytest.mark.parametrize(
"freq",
[
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")),
"D",
"h",
],
)
def test_resample_nat_index_series(freq, series, resample_method):
# GH39227
ser = series.copy()
ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
expected = DataFrame(
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
)
tm.assert_frame_equal(result, expected, check_dtype=False)
else:
expected = ser[:0].copy()
tm.assert_series_equal(result, expected, check_dtype=False)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
@pytest.mark.parametrize("resample_method", ["count", "size"])
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
# GH28427
ser = empty_series_dti
if freq == "ME" and isinstance(ser.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
ser.resample(freq)
return
elif freq == "ME" and isinstance(ser.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
warn = None
if isinstance(ser.index, PeriodIndex):
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(warn, match=msg):
rs = ser.resample(freq)
result = getattr(rs, resample_method)()
index = _asfreq_compat(ser.index, freq)
expected = Series([], dtype="int64", index=index, name=ser.name)
tm.assert_series_equal(result, expected)
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
# GH13212
df = empty_frame_dti
# count retains dimensions too
if freq == "ME" and isinstance(df.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
df.resample(freq, group_keys=False)
return
elif freq == "ME" and isinstance(df.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
warn = None
if isinstance(df.index, PeriodIndex):
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(warn, match=msg):
rs = df.resample(freq, group_keys=False)
result = getattr(rs, resample_method)()
if resample_method == "ohlc":
# TODO: no tests with len(df.columns) > 0
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]])
expected = DataFrame(
[], index=df.index[:0].copy(), columns=mi, dtype=np.float64
)
expected.index = _asfreq_compat(df.index, freq)
elif resample_method != "size":
expected = df.copy()
else:
# GH14962
expected = Series([], dtype=np.int64)
expected.index = _asfreq_compat(df.index, freq)
tm.assert_index_equal(result.index, expected.index)
assert result.index.freq == expected.index.freq
tm.assert_almost_equal(result, expected)
# test size for GH13212 (currently stays as df)
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_count_empty_dataframe(freq, empty_frame_dti):
# GH28427
empty_frame_dti["a"] = []
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
warn = None
if isinstance(empty_frame_dti.index, PeriodIndex):
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
with tm.assert_produces_warning(warn, match=msg):
rs = empty_frame_dti.resample(freq)
result = rs.count()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object))
tm.assert_frame_equal(result, expected)
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_resample_size_empty_dataframe(freq, empty_frame_dti):
# GH28427
empty_frame_dti["a"] = []
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_frame_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
msg = "Resampling with a PeriodIndex"
warn = None
if isinstance(empty_frame_dti.index, PeriodIndex):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
rs = empty_frame_dti.resample(freq)
result = rs.size()
index = _asfreq_compat(empty_frame_dti.index, freq)
expected = Series([], dtype="int64", index=index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
PeriodIndex([], freq="M", name="a"),
DatetimeIndex([], name="a"),
TimedeltaIndex([], name="a"),
],
)
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_resample_empty_dtypes(index, dtype, resample_method):
# Empty series were sometimes causing a segfault (for the functions
# with Cython bounds-checking disabled) or an IndexError. We just run
# them to ensure they no longer do. (GH #10228)
warn = None
if isinstance(index, PeriodIndex):
# GH#53511
index = PeriodIndex([], freq="B", name=index.name)
warn = FutureWarning
msg = "Resampling with a PeriodIndex is deprecated"
empty_series_dti = Series([], index, dtype)
with tm.assert_produces_warning(warn, match=msg):
rs = empty_series_dti.resample("d", group_keys=False)
try:
getattr(rs, resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object_)
pass
@all_ts
@pytest.mark.parametrize("freq", ["ME", "D", "h"])
def test_apply_to_empty_series(empty_series_dti, freq):
# GH 14313
ser = empty_series_dti
if freq == "ME" and isinstance(empty_series_dti.index, TimedeltaIndex):
msg = (
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, "
"e.g. '24h' or '3D', not <MonthEnd>"
)
with pytest.raises(ValueError, match=msg):
empty_series_dti.resample(freq)
return
elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex):
# index is PeriodIndex, so convert to corresponding Period freq
freq = "M"
msg = "Resampling with a PeriodIndex"
warn = None
if isinstance(empty_series_dti.index, PeriodIndex):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
rs = ser.resample(freq, group_keys=False)
result = rs.apply(lambda x: 1)
with tm.assert_produces_warning(warn, match=msg):
expected = ser.resample(freq).apply("sum")
tm.assert_series_equal(result, expected, check_dtype=False)
@all_ts
def test_resampler_is_iterable(series):
# GH 15314
freq = "h"
tg = Grouper(freq=freq, convention="start")
msg = "Resampling with a PeriodIndex"
warn = None
if isinstance(series.index, PeriodIndex):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
grouped = series.groupby(tg)
with tm.assert_produces_warning(warn, match=msg):
resampled = series.resample(freq)
for (rk, rv), (gk, gv) in zip(resampled, grouped):
assert rk == gk
tm.assert_series_equal(rv, gv)
@all_ts
def test_resample_quantile(series):
# GH 15023
ser = series
q = 0.75
freq = "h"
msg = "Resampling with a PeriodIndex"
warn = None
if isinstance(series.index, PeriodIndex):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
result = ser.resample(freq).quantile(q)
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("how", ["first", "last"])
def test_first_last_skipna(any_real_nullable_dtype, skipna, how):
# GH#57019
if is_extension_array_dtype(any_real_nullable_dtype):
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value
else:
na_value = np.nan
df = DataFrame(
{
"a": [2, 1, 1, 2],
"b": [na_value, 3.0, na_value, 4.0],
"c": [na_value, 3.0, na_value, 4.0],
},
index=date_range("2020-01-01", periods=4, freq="D"),
dtype=any_real_nullable_dtype,
)
rs = df.resample("ME")
method = getattr(rs, how)
result = method(skipna=skipna)
gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")])
expected = getattr(gb, how)(skipna=skipna)
expected.index.freq = "ME"
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,715 @@
from textwrap import dedent
import numpy as np
import pytest
from pandas.compat import is_platform_windows
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
TimedeltaIndex,
Timestamp,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import date_range
@pytest.fixture
def test_frame():
return DataFrame(
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
index=date_range("1/1/2000", freq="s", periods=40),
)
def test_tab_complete_ipython6_warning(ip):
from IPython.core.completer import provisionalcompleter
code = dedent(
"""\
import numpy as np
from pandas import Series, date_range
data = np.arange(10, dtype=np.float64)
index = date_range("2020-01-01", periods=len(data))
s = Series(data, index=index)
rs = s.resample("D")
"""
)
ip.run_cell(code)
# GH 31324 newer jedi version raises Deprecation warning;
# appears resolved 2021-02-02
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
with provisionalcompleter("ignore"):
list(ip.Completer.completions("rs.", 1))
def test_deferred_with_groupby():
# GH 12486
# support deferred resample ops with groupby
data = [
["2010-01-01", "A", 2],
["2010-01-02", "A", 3],
["2010-01-05", "A", 8],
["2010-01-10", "A", 7],
["2010-01-13", "A", 3],
["2010-01-01", "B", 5],
["2010-01-03", "B", 2],
["2010-01-04", "B", 1],
["2010-01-11", "B", 7],
["2010-01-14", "B", 3],
]
df = DataFrame(data, columns=["date", "id", "score"])
df.date = pd.to_datetime(df.date)
def f_0(x):
return x.set_index("date").resample("D").asfreq()
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
expected = df.groupby("id").apply(f_0)
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.set_index("date").groupby("id").resample("D").asfreq()
tm.assert_frame_equal(result, expected)
df = DataFrame(
{
"date": date_range(start="2016-01-01", periods=4, freq="W"),
"group": [1, 1, 2, 2],
"val": [5, 6, 7, 8],
}
).set_index("date")
def f_1(x):
return x.resample("1D").ffill()
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
expected = df.groupby("group").apply(f_1)
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby("group").resample("1D").ffill()
tm.assert_frame_equal(result, expected)
def test_getitem(test_frame):
g = test_frame.groupby("A")
expected = g.B.apply(lambda x: x.resample("2s").mean())
result = g.resample("2s").B.mean()
tm.assert_series_equal(result, expected)
result = g.B.resample("2s").mean()
tm.assert_series_equal(result, expected)
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = g.resample("2s").mean().B
tm.assert_series_equal(result, expected)
def test_getitem_multiple():
# GH 13174
# multiple calls after selection causing an issue with aliasing
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
r = df.groupby("id").resample("1D")
result = r["buyer"].count()
exp_mi = pd.MultiIndex.from_arrays([[1, 2], df.index], names=("id", None))
expected = Series(
[1, 1],
index=exp_mi,
name="buyer",
)
tm.assert_series_equal(result, expected)
result = r["buyer"].count()
tm.assert_series_equal(result, expected)
def test_groupby_resample_on_api_with_getitem():
# GH 17813
df = DataFrame(
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
)
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
result = df.groupby("id").resample("2D", on="date")["data"].sum()
tm.assert_series_equal(result, exp)
def test_groupby_with_origin():
# GH 31809
freq = "1399min" # prime number that is smaller than 24h
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
middle = "1/15/2000 00:00:00"
rng = date_range(start, end, freq="1231min") # prime number
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
ts2 = ts[middle:end]
# proves that grouper without a fixed origin does not work
# when dealing with unusual frequencies
simple_grouper = pd.Grouper(freq=freq)
count_ts = ts.groupby(simple_grouper).agg("count")
count_ts = count_ts[middle:end]
count_ts2 = ts2.groupby(simple_grouper).agg("count")
with pytest.raises(AssertionError, match="Index are different"):
tm.assert_index_equal(count_ts.index, count_ts2.index)
# test origin on 1970-01-01 00:00:00
origin = Timestamp(0)
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
adjusted_count_ts = adjusted_count_ts[middle:end]
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
# test origin on 2049-10-18 20:00:00
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
adjusted2_count_ts = adjusted2_count_ts[middle:end]
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
# both grouper use an adjusted timestamp that is a multiple of 1399 min
# they should be equals even if the adjusted_timestamp is in the future
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
def test_nearest():
# GH 17496
# Resample nearest
index = date_range("1/1/2000", periods=3, freq="min")
result = Series(range(3), index=index).resample("20s").nearest()
expected = Series(
[0, 0, 1, 1, 1, 2, 2],
index=pd.DatetimeIndex(
[
"2000-01-01 00:00:00",
"2000-01-01 00:00:20",
"2000-01-01 00:00:40",
"2000-01-01 00:01:00",
"2000-01-01 00:01:20",
"2000-01-01 00:01:40",
"2000-01-01 00:02:00",
],
dtype="datetime64[ns]",
freq="20s",
),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"f",
[
"first",
"last",
"median",
"sem",
"sum",
"mean",
"min",
"max",
"size",
"count",
"nearest",
"bfill",
"ffill",
"asfreq",
"ohlc",
],
)
def test_methods(f, test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = getattr(r, f)()
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
tm.assert_equal(result, expected)
def test_methods_nunique(test_frame):
# series only
g = test_frame.groupby("A")
r = g.resample("2s")
result = r.B.nunique()
expected = g.B.apply(lambda x: x.resample("2s").nunique())
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("f", ["std", "var"])
def test_methods_std_var(f, test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = getattr(r, f)(ddof=1)
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
tm.assert_frame_equal(result, expected)
def test_apply(test_frame):
g = test_frame.groupby("A")
r = g.resample("2s")
# reduction
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
expected = g.resample("2s").sum()
def f_0(x):
return x.resample("2s").sum()
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = r.apply(f_0)
tm.assert_frame_equal(result, expected)
def f_1(x):
return x.resample("2s").apply(lambda y: y.sum())
msg = "DataFrameGroupBy.apply operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = g.apply(f_1)
# y.sum() results in int64 instead of int32 on 32-bit architectures
expected = expected.astype("int64")
tm.assert_frame_equal(result, expected)
def test_apply_with_mutated_index():
# GH 15169
index = date_range("1-1-2015", "12-31-15", freq="D")
df = DataFrame(
data={"col1": np.random.default_rng(2).random(len(index))}, index=index
)
def f(x):
s = Series([1, 2], index=["a", "b"])
return s
expected = df.groupby(pd.Grouper(freq="ME")).apply(f)
result = df.resample("ME").apply(f)
tm.assert_frame_equal(result, expected)
# A case for series
expected = df["col1"].groupby(pd.Grouper(freq="ME"), group_keys=False).apply(f)
result = df["col1"].resample("ME").apply(f)
tm.assert_series_equal(result, expected)
def test_apply_columns_multilevel():
# GH 16231
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
result = df.resample("h").apply(lambda x: agg_dict[x.name](x))
expected = DataFrame(
2 * [[0, 0.0]],
index=date_range(start="2017-01-01", freq="1h", periods=2),
columns=pd.MultiIndex.from_tuples(
[("A", "a", "", "one"), ("B", "b", "i", "two")]
),
)
tm.assert_frame_equal(result, expected)
def test_apply_non_naive_index():
def weighted_quantile(series, weights, q):
series = series.sort_values()
cumsum = weights.reindex(series.index).fillna(0).cumsum()
cutoff = cumsum.iloc[-1] * q
return series[cumsum >= cutoff].iloc[0]
times = date_range("2017-6-23 18:00", periods=8, freq="15min", tz="UTC")
data = Series([1.0, 1, 1, 1, 1, 2, 2, 0], index=times)
weights = Series([160.0, 91, 65, 43, 24, 10, 1, 0], index=times)
result = data.resample("D").apply(weighted_quantile, weights=weights, q=0.5)
ind = date_range(
"2017-06-23 00:00:00+00:00", "2017-06-23 00:00:00+00:00", freq="D", tz="UTC"
)
expected = Series([1.0], index=ind)
tm.assert_series_equal(result, expected)
def test_resample_groupby_with_label(unit):
# GH 13235
index = date_range("2000-01-01", freq="2D", periods=5, unit=unit)
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby("col0").resample("1W", label="left").sum()
mi = [
np.array([0, 0, 1, 2], dtype=np.int64),
np.array(
["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"],
dtype=f"M8[{unit}]",
),
]
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
expected = DataFrame(
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
)
tm.assert_frame_equal(result, expected)
def test_consistency_with_window(test_frame):
# consistent return values with window
df = test_frame
expected = Index([1, 2, 3], name="A")
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby("A").resample("2s").mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
result = df.groupby("A").rolling(20).mean()
assert result.index.nlevels == 2
tm.assert_index_equal(result.index.levels[0], expected)
def test_median_duplicate_columns():
# GH 14233
df = DataFrame(
np.random.default_rng(2).standard_normal((20, 3)),
columns=list("aaa"),
index=date_range("2012-01-01", periods=20, freq="s"),
)
df2 = df.copy()
df2.columns = ["a", "b", "c"]
expected = df2.resample("5s").median()
result = df.resample("5s").median()
expected.columns = result.columns
tm.assert_frame_equal(result, expected)
def test_apply_to_one_column_of_df():
# GH: 36951
df = DataFrame(
{"col": range(10), "col1": range(10, 20)},
index=date_range("2012-01-01", periods=10, freq="20min"),
)
# access "col" via getattr -> make sure we handle AttributeError
result = df.resample("h").apply(lambda group: group.col.sum())
expected = Series(
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="h")
)
tm.assert_series_equal(result, expected)
# access "col" via _getitem__ -> make sure we handle KeyErrpr
result = df.resample("h").apply(lambda group: group["col"].sum())
tm.assert_series_equal(result, expected)
def test_resample_groupby_agg():
# GH: 33548
df = DataFrame(
{
"cat": [
"cat_1",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
"cat_2",
"cat_1",
],
"num": [5, 20, 22, 3, 4, 30, 10, 50],
"date": [
"2019-2-1",
"2018-02-03",
"2020-3-11",
"2019-2-2",
"2019-2-2",
"2018-12-4",
"2020-3-11",
"2020-12-12",
],
}
)
df["date"] = pd.to_datetime(df["date"])
resampled = df.groupby("cat").resample("YE", on="date")
expected = resampled[["num"]].sum()
result = resampled.agg({"num": "sum"})
tm.assert_frame_equal(result, expected)
def test_resample_groupby_agg_listlike():
# GH 42905
ts = Timestamp("2021-02-28 00:00:00")
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
resampled = df.groupby("class").resample("ME")["value"]
result = resampled.agg(["sum", "size"])
expected = DataFrame(
[[69, 1]],
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
columns=["sum", "size"],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
def test_empty(keys):
# GH 26411
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
expected = (
DataFrame(columns=["a", "b"])
.set_index(keys, drop=False)
.set_index(TimedeltaIndex([]), append=True)
)
if len(keys) == 1:
expected.index.name = keys[0]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("consolidate", [True, False])
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
# https://github.com/pandas-dev/pandas/issues/39329
dates = date_range("2020-01-01", periods=15, freq="D")
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
df = pd.concat([df1, df2], ignore_index=True)
if consolidate:
df = df._consolidate()
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby(["key"]).resample("W", on="date").min()
idx = pd.MultiIndex.from_arrays(
[
["A"] * 3 + ["B"] * 3,
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2).as_unit(
"ns"
),
],
names=["key", "date"],
)
expected = DataFrame(
{
"key": ["A"] * 3 + ["B"] * 3,
"col1": [0, 5, 12] * 2,
"col_object": ["val"] * 3 + [np.nan] * 3,
},
index=idx,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_with_list_of_keys():
# GH 47362
df = DataFrame(
data={
"date": date_range(start="2016-01-01", periods=8),
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [1, 7, 5, 2, 3, 10, 5, 1],
}
)
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df["date"]._values[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"val": [4.0, 3.5, 6.5, 3.0],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
def test_resample_no_index(keys):
# GH 47705
df = DataFrame([], columns=["a", "b", "date"])
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
expected["date"] = pd.to_datetime(expected["date"])
expected = expected.set_index("date", append=True, drop=True)
if len(keys) == 1:
expected.index.name = keys[0]
tm.assert_frame_equal(result, expected)
def test_resample_no_columns():
# GH#52484
df = DataFrame(
index=Index(
pd.to_datetime(
["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
),
name="date",
)
)
result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
index = pd.to_datetime(
[
"2018-01-01 00:00:00",
"2018-01-01 06:00:00",
"2018-01-01 12:00:00",
"2018-01-02 00:00:00",
]
)
expected = DataFrame(
index=pd.MultiIndex(
levels=[np.array([0, 1], dtype=np.intp), index],
codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
names=[None, "date"],
)
)
# GH#52710 - Index comes out as 32-bit on 64-bit Windows
tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
def test_groupby_resample_size_all_index_same():
# GH 46826
df = DataFrame(
{"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
index=date_range("31/12/2000 18:00", freq="h", periods=12),
)
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = df.groupby("A").resample("D").size()
mi_exp = pd.MultiIndex.from_arrays(
[
[1, 1, 2, 2],
pd.DatetimeIndex(["2000-12-31", "2001-01-01"] * 2, dtype="M8[ns]"),
],
names=["A", None],
)
expected = Series(
3,
index=mi_exp,
)
tm.assert_series_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys():
# GH 50840
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=date_range(start="2016-01-01", periods=8, name="date"),
)
result = df.groupby("group").resample("2D")[["val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"val": [2.0, 2.5, 7.0, 4.0],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
},
index=date_range(start="2016-01-01", periods=8, name="date"),
)
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
mi_exp = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], df.index[::2]], names=["group", "date"]
)
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
"second_val": [4.5, 4.5, 5.0, 4.5],
},
index=mi_exp,
)
tm.assert_frame_equal(result, expected)
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
# GH 50876
df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"val": [3, 1, 4, 1, 5, 9, 2, 6],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)
gb = df.groupby("group")
rs = gb.resample("2D")
with pytest.raises(KeyError, match="Columns not found"):
rs[["val_not_in_dataframe"]]
@pytest.mark.parametrize("kind", ["datetime", "period"])
def test_groupby_resample_kind(kind):
# GH 24103
df = DataFrame(
{
"datetime": pd.to_datetime(
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"]
),
"group": ["A", "B", "A", "B"],
"value": [1, 2, 3, 4],
}
)
df = df.set_index("datetime")
result = df.groupby("group")["value"].resample("D", kind=kind).last()
dt_level = pd.DatetimeIndex(["2018-11-01", "2018-11-02"])
if kind == "period":
dt_level = dt_level.to_period(freq="D")
expected_index = pd.MultiIndex.from_product(
[["A", "B"], dt_level],
names=["group", "datetime"],
)
expected = Series([1, 3, 2, 4], index=expected_index, name="value")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,390 @@
from datetime import datetime
from operator import methodcaller
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core.groupby.grouper import Grouper
from pandas.core.indexes.datetimes import date_range
@pytest.fixture
def test_series():
return Series(
np.random.default_rng(2).standard_normal(1000),
index=date_range("1/1/2000", periods=1000),
)
def test_apply(test_series):
grouper = Grouper(freq="YE", label="right", closed="right")
grouped = test_series.groupby(grouper)
def f(x):
return x.sort_values()[-3:]
applied = grouped.apply(f)
expected = test_series.groupby(lambda x: x.year).apply(f)
applied.index = applied.index.droplevel(0)
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(applied, expected)
def test_count(test_series):
test_series[::3] = np.nan
expected = test_series.groupby(lambda x: x.year).count()
grouper = Grouper(freq="YE", label="right", closed="right")
result = test_series.groupby(grouper).count()
expected.index = result.index
tm.assert_series_equal(result, expected)
result = test_series.resample("YE").count()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_numpy_reduction(test_series):
result = test_series.resample("YE", closed="right").prod()
msg = "using SeriesGroupBy.prod"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_apply_iteration():
# #2300
N = 1000
ind = date_range(start="2000-01-01", freq="D", periods=N)
df = DataFrame({"open": 1, "close": 2}, index=ind)
tg = Grouper(freq="ME")
grouper, _ = tg._get_grouper(df)
# Errors
grouped = df.groupby(grouper, group_keys=False)
def f(df):
return df["close"] / df["open"]
# it works!
result = grouped.apply(f)
tm.assert_index_equal(result.index, df.index)
@pytest.mark.parametrize(
"index",
[
Index([1, 2]),
Index(["a", "b"]),
Index([1.1, 2.2]),
pd.MultiIndex.from_arrays([[1, 2], ["a", "b"]]),
],
)
def test_fails_on_no_datetime_index(index):
name = type(index).__name__
df = DataFrame({"a": range(len(index))}, index=index)
msg = (
"Only valid with DatetimeIndex, TimedeltaIndex "
f"or PeriodIndex, but got an instance of '{name}'"
)
with pytest.raises(TypeError, match=msg):
df.groupby(Grouper(freq="D"))
def test_aaa_group_order():
# GH 12840
# check TimeGrouper perform stable sorts
n = 20
data = np.random.default_rng(2).standard_normal((n, 4))
df = DataFrame(data, columns=["A", "B", "C", "D"])
df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
grouped = df.groupby(Grouper(key="key", freq="D"))
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_aggregate_normal(resample_method):
"""Check TimeGrouper's aggregation is identical as normal groupby."""
data = np.random.default_rng(2).standard_normal((20, 4))
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = getattr(normal_grouped, resample_method)()
dt_result = getattr(dt_grouped, resample_method)()
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
tm.assert_equal(expected, dt_result)
@pytest.mark.xfail(reason="if TimeGrouper is used included, 'nth' doesn't work yet")
def test_aggregate_nth():
"""Check TimeGrouper's aggregation is identical as normal groupby."""
data = np.random.default_rng(2).standard_normal((20, 4))
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, 3, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = [
datetime(2013, 1, 1),
datetime(2013, 1, 2),
datetime(2013, 1, 3),
datetime(2013, 1, 4),
datetime(2013, 1, 5),
] * 4
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
expected = normal_grouped.nth(3)
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
dt_result = dt_grouped.nth(3)
tm.assert_frame_equal(expected, dt_result)
@pytest.mark.parametrize(
"method, method_args, unit",
[
("sum", {}, 0),
("sum", {"min_count": 0}, 0),
("sum", {"min_count": 1}, np.nan),
("prod", {}, 1),
("prod", {"min_count": 0}, 1),
("prod", {"min_count": 1}, np.nan),
],
)
def test_resample_entirely_nat_window(method, method_args, unit):
ser = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4))
result = methodcaller(method, **method_args)(ser.resample("2d"))
exp_dti = pd.DatetimeIndex(["2017-01-01", "2017-01-03"], dtype="M8[ns]", freq="2D")
expected = Series([0.0, unit], index=exp_dti)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"func, fill_value",
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
)
def test_aggregate_with_nat(func, fill_value):
# check TimeGrouper's aggregation is identical as normal groupby
# if NaT is included, 'var', 'std', 'mean', 'first','last'
# and 'nth' doesn't work yet
n = 20
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
dti = date_range(
start="2013-01-01",
freq="D",
periods=5,
name="key",
unit=dt_df["key"]._values.unit,
)
expected.index = dti._with_freq(None) # TODO: is this desired?
tm.assert_frame_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_aggregate_with_nat_size():
# GH 9925
n = 20
data = np.random.default_rng(2).standard_normal((n, 4)).astype("int64")
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
dt_df["key"] = Index(
[
datetime(2013, 1, 1),
datetime(2013, 1, 2),
pd.NaT,
datetime(2013, 1, 4),
datetime(2013, 1, 5),
]
* 4,
dtype="M8[ns]",
)
normal_grouped = normal_df.groupby("key")
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
normal_result = normal_grouped.size()
dt_result = dt_grouped.size()
pad = Series([0], index=[3])
expected = pd.concat([normal_result, pad])
expected = expected.sort_index()
expected.index = date_range(
start="2013-01-01",
freq="D",
periods=5,
name="key",
unit=dt_df["key"]._values.unit,
)._with_freq(None)
tm.assert_series_equal(expected, dt_result)
assert dt_result.index.name == "key"
def test_repr():
# GH18203
result = repr(Grouper(key="A", freq="h"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin='start_day')"
)
assert result == expected
result = repr(Grouper(key="A", freq="h", origin="2000-01-01"))
expected = (
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, dropna=True, "
"closed='left', label='left', how='mean', "
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
)
assert result == expected
@pytest.mark.parametrize(
"method, method_args, expected_values",
[
("sum", {}, [1, 0, 1]),
("sum", {"min_count": 0}, [1, 0, 1]),
("sum", {"min_count": 1}, [1, np.nan, 1]),
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
("prod", {}, [1, 1, 1]),
("prod", {"min_count": 0}, [1, 1, 1]),
("prod", {"min_count": 1}, [1, np.nan, 1]),
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
],
)
def test_upsample_sum(method, method_args, expected_values):
ser = Series(1, index=date_range("2017", periods=2, freq="h"))
resampled = ser.resample("30min")
index = pd.DatetimeIndex(
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
dtype="M8[ns]",
freq="30min",
)
result = methodcaller(method, **method_args)(resampled)
expected = Series(expected_values, index=index)
tm.assert_series_equal(result, expected)
def test_groupby_resample_interpolate():
# GH 35325
d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
df = DataFrame(d)
df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
msg = "DataFrameGroupBy.resample operated on the grouping columns"
with tm.assert_produces_warning(DeprecationWarning, match=msg):
result = (
df.set_index("week_starting")
.groupby("volume")
.resample("1D")
.interpolate(method="linear")
)
volume = [50] * 15 + [60]
week_starting = list(date_range("2018-01-07", "2018-01-21")) + [
Timestamp("2018-01-14")
]
expected_ind = pd.MultiIndex.from_arrays(
[volume, week_starting],
names=["volume", "week_starting"],
)
expected = DataFrame(
data={
"price": [
10.0,
9.928571428571429,
9.857142857142858,
9.785714285714286,
9.714285714285714,
9.642857142857142,
9.571428571428571,
9.5,
9.428571428571429,
9.357142857142858,
9.285714285714286,
9.214285714285714,
9.142857142857142,
9.071428571428571,
9.0,
11.0,
],
"volume": [50.0] * 15 + [60],
},
index=expected_ind,
)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,220 @@
from datetime import timedelta
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.timedeltas import timedelta_range
def test_asfreq_bug():
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
result = df.resample("1min").asfreq()
expected = DataFrame(
data=[1, np.nan, np.nan, 3],
index=timedelta_range("0 day", periods=4, freq="1min"),
)
tm.assert_frame_equal(result, expected)
def test_resample_with_nat():
# GH 13223
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
expected = DataFrame(
{"value": [2.5, np.nan, 5.0]},
index=timedelta_range("0 day", periods=3, freq="1s"),
)
tm.assert_frame_equal(result, expected)
def test_resample_as_freq_with_subperiod():
# GH 13022
index = timedelta_range("00:00:00", "00:10:00", freq="5min")
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
result = df.resample("2min").asfreq()
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
expected = DataFrame(
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2min")
)
tm.assert_frame_equal(result, expected)
def test_resample_with_timedeltas():
expected = DataFrame({"A": np.arange(1480)})
expected = expected.groupby(expected.index // 30).sum()
expected.index = timedelta_range("0 days", freq="30min", periods=50)
df = DataFrame(
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min")
)
result = df.resample("30min").sum()
tm.assert_frame_equal(result, expected)
s = df["A"]
result = s.resample("30min").sum()
tm.assert_series_equal(result, expected["A"])
def test_resample_single_period_timedelta():
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
result = s.resample("2s").sum()
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
tm.assert_series_equal(result, expected)
def test_resample_timedelta_idempotency():
# GH 12072
index = timedelta_range("0", periods=9, freq="10ms")
series = Series(range(9), index=index)
result = series.resample("10ms").mean()
expected = series.astype(float)
tm.assert_series_equal(result, expected)
def test_resample_offset_with_timedeltaindex():
# GH 10530 & 31809
rng = timedelta_range(start="0s", periods=25, freq="s")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
with_base = ts.resample("2s", offset="5s").mean()
without_base = ts.resample("2s").mean()
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
tm.assert_index_equal(without_base.index, exp_without_base)
tm.assert_index_equal(with_base.index, exp_with_base)
def test_resample_categorical_data_with_timedeltaindex():
# GH #12169
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
df["Group"] = df["Group_obj"].astype("category")
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit(
"ns"
)
expected = DataFrame(
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
index=exp_tdi,
)
expected = expected.reindex(["Group_obj", "Group"], axis=1)
expected["Group"] = expected["Group_obj"].astype("category")
tm.assert_frame_equal(result, expected)
def test_resample_timedelta_values():
# GH 13119
# check that timedelta dtype is preserved when NaT values are
# introduced by the resampling
times = timedelta_range("1 day", "6 day", freq="4D")
df = DataFrame({"time": times}, index=times)
times2 = timedelta_range("1 day", "6 day", freq="2D")
exp = Series(times2, index=times2, name="time")
exp.iloc[1] = pd.NaT
res = df.resample("2D").first()["time"]
tm.assert_series_equal(res, exp)
res = df["time"].resample("2D").first()
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"start, end, freq, resample_freq",
[
("8h", "21h59min50s", "10s", "3h"), # GH 30353 example
("3h", "22h", "1h", "5h"),
("527D", "5006D", "3D", "10D"),
("1D", "10D", "1D", "2D"), # GH 13022 example
# tests that worked before GH 33498:
("8h", "21h59min50s", "10s", "2h"),
("0h", "21h59min50s", "10s", "3h"),
("10D", "85D", "D", "2D"),
],
)
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
# GH 33498
# check that the timedelta bins does not contains an extra bin
idx = timedelta_range(start=start, end=end, freq=freq)
s = Series(np.arange(len(idx)), index=idx)
result = s.resample(resample_freq).min()
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
tm.assert_index_equal(result.index, expected_index)
assert result.index.freq == expected_index.freq
assert not np.isnan(result.iloc[-1])
@pytest.mark.parametrize("duplicates", [True, False])
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
# GH 10603
df = DataFrame(
np.random.default_rng(2).normal(size=(10000, 4)),
index=timedelta_range(start="0s", periods=10000, freq="3906250ns"),
)
if duplicates:
# case with non-unique columns
df.columns = ["A", "B", "A", "C"]
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
expected = DataFrame(
[[768] * 4] * 12 + [[528] * 4],
index=timedelta_range(start="1s", periods=13, freq="3s"),
)
expected.columns = df.columns
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
def test_resample_quantile_timedelta(unit):
# GH: 29485
dtype = np.dtype(f"m8[{unit}]")
df = DataFrame(
{"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
index=pd.date_range("20200101", periods=4, tz="UTC"),
)
result = df.resample("2D").quantile(0.99)
expected = DataFrame(
{
"value": [
pd.Timedelta("0 days 00:00:00.990000"),
pd.Timedelta("0 days 00:00:02.990000"),
]
},
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
).astype(dtype)
tm.assert_frame_equal(result, expected)
def test_resample_closed_right():
# GH#45414
idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
ser = Series(range(10), index=idx)
result = ser.resample("min", closed="right", label="right").sum()
expected = Series(
[0, 3, 7, 11, 15, 9],
index=pd.TimedeltaIndex(
[pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="min"
),
)
tm.assert_series_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_arrow_duration_resample():
# GH 56371
idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]")
expected = Series(np.arange(5, dtype=np.float64), index=idx)
result = expected.resample("1D").mean()
tm.assert_series_equal(result, expected)