Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,30 @@
from datetime import timedelta
from pandas import (
Index,
Timestamp,
date_range,
isna,
)
class TestAsOf:
def test_asof_partial(self):
index = date_range("2010-01-01", periods=2, freq="ME")
expected = Timestamp("2010-02-28")
result = index.asof("2010-02")
assert result == expected
assert not isinstance(result, Index)
def test_asof(self):
index = date_range("2020-01-01", periods=10)
dt = index[0]
assert index.asof(dt) == dt
assert isna(index.asof(dt - timedelta(1)))
dt = index[-1]
assert index.asof(dt + timedelta(1)) == dt
dt = index[0].to_pydatetime()
assert isinstance(index.asof(dt), Timestamp)

View File

@ -0,0 +1,335 @@
from datetime import datetime
import dateutil
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
NaT,
PeriodIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndex:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_astype_asobject_around_dst_transition(self, tzstr):
# GH#1345
# dates around a dst transition
rng = date_range("2/13/2010", "5/6/2010", tz=tzstr)
objs = rng.astype(object)
for i, x in enumerate(objs):
exval = rng[i]
assert x == exval
assert x.tzinfo == exval.tzinfo
objs = rng.astype(object)
for i, x in enumerate(objs):
exval = rng[i]
assert x == exval
assert x.tzinfo == exval.tzinfo
def test_astype(self):
# GH 13149, GH 13209
idx = DatetimeIndex(
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
)
result = idx.astype(object)
expected = Index(
[Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
result = idx.astype(np.int64)
expected = Index(
[1463356800000000000] + [-9223372036854775808] * 3,
dtype=np.int64,
name="idx",
)
tm.assert_index_equal(result, expected)
def test_astype2(self):
rng = date_range("1/1/2000", periods=10, name="idx")
result = rng.astype("i8")
tm.assert_index_equal(result, Index(rng.asi8, name="idx"))
tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_uint(self):
arr = date_range("2000", periods=2, name="idx")
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
arr.astype("uint64")
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
arr.astype("uint32")
def test_astype_with_tz(self):
# with tz
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# deprecated
rng.astype("datetime64[ns]")
with pytest.raises(TypeError, match=msg):
# check DatetimeArray while we're here deprecated
rng._data.astype("datetime64[ns]")
def test_astype_tzaware_to_tzaware(self):
# GH 18951: tz-aware to tz-aware
idx = date_range("20170101", periods=4, tz="US/Pacific")
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
def test_astype_tznaive_to_tzaware(self):
# GH 18951: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
idx = idx._with_freq(None) # tz_localize does not preserve freq
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx.astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz deprecated
idx._data.astype("datetime64[ns, US/Eastern]")
def test_astype_str_nat(self):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
result = idx.astype(str)
expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
tm.assert_index_equal(result, expected)
def test_astype_str(self):
# test astype string - #10442
dti = date_range("2012-01-01", periods=4, name="test_name")
result = dti.astype(str)
expected = Index(
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
def test_astype_str_tz_and_name(self):
# test astype string with tz and name
dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern")
result = dti.astype(str)
expected = Index(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
def test_astype_str_freq_and_name(self):
# test astype string with freqH and name
dti = date_range("1/1/2011", periods=3, freq="h", name="test_name")
result = dti.astype(str)
expected = Index(
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)
def test_astype_str_freq_and_tz(self):
# test astype string with freqH and timezone
dti = date_range(
"3/6/2012 00:00", periods=2, freq="h", tz="Europe/London", name="test_name"
)
result = dti.astype(str)
expected = Index(
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
dtype=object,
name="test_name",
)
tm.assert_index_equal(result, expected)
def test_astype_datetime64(self):
# GH 13149, GH 13209
idx = DatetimeIndex(
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
)
result = idx.astype("datetime64[ns]")
tm.assert_index_equal(result, idx)
assert result is not idx
result = idx.astype("datetime64[ns]", copy=False)
tm.assert_index_equal(result, idx)
assert result is idx
idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], tz="EST", name="idx")
msg = "Cannot use .astype to convert from timezone-aware"
with pytest.raises(TypeError, match=msg):
# dt64tz->dt64 deprecated
result = idx_tz.astype("datetime64[ns]")
def test_astype_object(self):
rng = date_range("1/1/2000", periods=20)
casted = rng.astype("O")
exp_values = list(rng)
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
assert casted.tolist() == exp_values
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_astype_object_tz(self, tz):
idx = date_range(start="2013-01-01", periods=4, freq="ME", name="idx", tz=tz)
expected_list = [
Timestamp("2013-01-31", tz=tz),
Timestamp("2013-02-28", tz=tz),
Timestamp("2013-03-31", tz=tz),
Timestamp("2013-04-30", tz=tz),
]
expected = Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
def test_astype_object_with_nat(self):
idx = DatetimeIndex(
[datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)],
name="idx",
)
expected_list = [
Timestamp("2013-01-01"),
Timestamp("2013-01-02"),
NaT,
Timestamp("2013-01-04"),
]
expected = Index(expected_list, dtype=object, name="idx")
result = idx.astype(object)
tm.assert_index_equal(result, expected)
assert idx.tolist() == expected_list
@pytest.mark.parametrize(
"dtype",
[float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"],
)
def test_astype_raises(self, dtype):
# GH 13149, GH 13209
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
msg = "Cannot cast DatetimeIndex to dtype"
if dtype == "datetime64":
msg = "Casting to unit-less dtype 'datetime64' is not supported"
with pytest.raises(TypeError, match=msg):
idx.astype(dtype)
def test_index_convert_to_datetime_array(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="US/Eastern")
rng_utc = date_range("20090415", "20090519", tz="utc")
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_explicit_pytz(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
rng_utc = date_range("20090415", "20090519", tz=pytz.utc)
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
def test_index_convert_to_datetime_array_dateutil(self):
def _check_rng(rng):
converted = rng.to_pydatetime()
assert isinstance(converted, np.ndarray)
for x, stamp in zip(converted, rng):
assert isinstance(x, datetime)
assert x == stamp.to_pydatetime()
assert x.tzinfo == stamp.tzinfo
rng = date_range("20090415", "20090519")
rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc())
_check_rng(rng)
_check_rng(rng_eastern)
_check_rng(rng_utc)
@pytest.mark.parametrize(
"tz, dtype",
[["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]],
)
def test_integer_index_astype_datetime(self, tz, dtype):
# GH 20997, 20964, 24559
val = [Timestamp("2018-01-01", tz=tz).as_unit("ns")._value]
result = Index(val, name="idx").astype(dtype)
expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx").as_unit("ns")
tm.assert_index_equal(result, expected)
def test_dti_astype_period(self):
idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx")
res = idx.astype("period[M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
tm.assert_index_equal(res, exp)
res = idx.astype("period[3M]")
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
tm.assert_index_equal(res, exp)
class TestAstype:
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_category(self, tz):
obj = date_range("2000", periods=2, tz=tz, name="idx")
result = obj.astype("category")
dti = DatetimeIndex(["2000-01-01", "2000-01-02"], tz=tz).as_unit("ns")
expected = pd.CategoricalIndex(
dti,
name="idx",
)
tm.assert_index_equal(result, expected)
result = obj._data.astype("category")
expected = expected.values
tm.assert_categorical_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "US/Central"])
def test_astype_array_fallback(self, tz):
obj = date_range("2000", periods=2, tz=tz, name="idx")
result = obj.astype(bool)
expected = Index(np.array([True, True]), name="idx")
tm.assert_index_equal(result, expected)
result = obj._data.astype(bool)
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,141 @@
import pytest
from pandas import (
DatetimeIndex,
Series,
date_range,
)
import pandas._testing as tm
class TestDelete:
def test_delete(self, unit):
idx = date_range(
start="2000-01-01", periods=5, freq="ME", name="idx", unit=unit
)
# preserve freq
expected_0 = date_range(
start="2000-02-01", periods=4, freq="ME", name="idx", unit=unit
)
expected_4 = date_range(
start="2000-01-01", periods=4, freq="ME", name="idx", unit=unit
)
# reset freq to None
expected_1 = DatetimeIndex(
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
freq=None,
name="idx",
).as_unit(unit)
cases = {
0: expected_0,
-5: expected_0,
-1: expected_4,
4: expected_4,
1: expected_1,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
with pytest.raises((IndexError, ValueError), match="out of bounds"):
# either depending on numpy version
idx.delete(5)
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
def test_delete2(self, tz):
idx = date_range(
start="2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz
)
expected = date_range(
start="2000-01-01 10:00", periods=9, freq="h", name="idx", tz=tz
)
result = idx.delete(0)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "h"
assert result.tz == expected.tz
expected = date_range(
start="2000-01-01 09:00", periods=9, freq="h", name="idx", tz=tz
)
result = idx.delete(-1)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freqstr == "h"
assert result.tz == expected.tz
def test_delete_slice(self, unit):
idx = date_range(
start="2000-01-01", periods=10, freq="D", name="idx", unit=unit
)
# preserve freq
expected_0_2 = date_range(
start="2000-01-04", periods=7, freq="D", name="idx", unit=unit
)
expected_7_9 = date_range(
start="2000-01-01", periods=7, freq="D", name="idx", unit=unit
)
# reset freq to None
expected_3_5 = DatetimeIndex(
[
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
],
freq=None,
name="idx",
).as_unit(unit)
cases = {
(0, 1, 2): expected_0_2,
(7, 8, 9): expected_7_9,
(3, 4, 5): expected_3_5,
}
for n, expected in cases.items():
result = idx.delete(n)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
result = idx.delete(slice(n[0], n[-1] + 1))
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# TODO: belongs in Series.drop tests?
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
def test_delete_slice2(self, tz, unit):
dti = date_range(
"2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz, unit=unit
)
ts = Series(
1,
index=dti,
)
# preserve freq
result = ts.drop(ts.index[:5]).index
expected = dti[5:]
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
# reset freq to None
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
expected = dti[::2]._with_freq(None)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz

View File

@ -0,0 +1,125 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Index,
date_range,
factorize,
)
import pandas._testing as tm
class TestDatetimeIndexFactorize:
def test_factorize(self):
idx1 = DatetimeIndex(
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
# tz must be preserved
idx1 = idx1.tz_localize("Asia/Tokyo")
exp_idx = exp_idx.tz_localize("Asia/Tokyo")
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
idx2 = DatetimeIndex(
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
assert idx.freq == exp_idx.freq
def test_factorize_preserves_freq(self):
# GH#38120 freq should be preserved
idx3 = date_range("2000-01", periods=4, freq="ME", tz="Asia/Tokyo")
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
arr, idx = idx3.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
assert idx.freq == idx3.freq
arr, idx = factorize(idx3)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, idx3)
assert idx.freq == idx3.freq
def test_factorize_tz(self, tz_naive_fixture, index_or_series):
tz = tz_naive_fixture
# GH#13750
base = date_range("2016-11-05", freq="h", periods=100, tz=tz)
idx = base.repeat(5)
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
expected = base._with_freq(None)
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq
def test_factorize_dst(self, index_or_series):
# GH#13750
idx = date_range("2016-11-06", freq="h", periods=12, tz="US/Eastern")
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
if index_or_series is Index:
assert res.freq == idx.freq
idx = date_range("2016-06-13", freq="h", periods=12, tz="US/Eastern")
obj = index_or_series(idx)
arr, res = obj.factorize()
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
tm.assert_index_equal(res, idx)
if index_or_series is Index:
assert res.freq == idx.freq
@pytest.mark.parametrize("sort", [True, False])
def test_factorize_no_freq_non_nano(self, tz_naive_fixture, sort):
# GH#51978 case that does not go through the fastpath based on
# non-None freq
tz = tz_naive_fixture
idx = date_range("2016-11-06", freq="h", periods=5, tz=tz)[[0, 4, 1, 3, 2]]
exp_codes, exp_uniques = idx.factorize(sort=sort)
res_codes, res_uniques = idx.as_unit("s").factorize(sort=sort)
tm.assert_numpy_array_equal(res_codes, exp_codes)
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))
res_codes, res_uniques = idx.as_unit("s").to_series().factorize(sort=sort)
tm.assert_numpy_array_equal(res_codes, exp_codes)
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))

View File

@ -0,0 +1,62 @@
import pytest
import pandas as pd
import pandas._testing as tm
class TestDatetimeIndexFillNA:
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
def test_fillna_datetime64(self, tz):
# GH 11343
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"]
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# tz mismatch
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00", tz=tz),
pd.Timestamp("2011-01-01 11:00"),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
# object
exp = pd.Index(
[pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz)
exp = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
# object
exp = pd.Index(
[
pd.Timestamp("2011-01-01 09:00", tz=tz),
"x",
pd.Timestamp("2011-01-01 11:00", tz=tz),
],
dtype=object,
)
tm.assert_index_equal(idx.fillna("x"), exp)

View File

@ -0,0 +1,265 @@
from datetime import datetime
import numpy as np
import pytest
import pytz
from pandas import (
NA,
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestInsert:
@pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA])
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_nat(self, tz, null):
# GH#16537, GH#18295 (test missing)
idx = DatetimeIndex(["2017-01-01"], tz=tz)
expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
if tz is not None and isinstance(null, np.datetime64):
expected = Index([null, idx[0]], dtype=object)
res = idx.insert(0, null)
tm.assert_index_equal(res, expected)
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
def test_insert_invalid_na(self, tz):
idx = DatetimeIndex(["2017-01-01"], tz=tz)
item = np.timedelta64("NaT")
result = idx.insert(0, item)
expected = Index([item] + list(idx), dtype=object)
tm.assert_index_equal(result, expected)
def test_insert_empty_preserves_freq(self, tz_naive_fixture):
# GH#33573
tz = tz_naive_fixture
dti = DatetimeIndex([], tz=tz, freq="D")
item = Timestamp("2017-04-05").tz_localize(tz)
result = dti.insert(0, item)
assert result.freq == dti.freq
# But not when we insert an item that doesn't conform to freq
dti = DatetimeIndex([], tz=tz, freq="W-THU")
result = dti.insert(0, item)
assert result.freq is None
def test_insert(self, unit):
idx = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-02"], name="idx"
).as_unit(unit)
result = idx.insert(2, datetime(2000, 1, 5))
exp = DatetimeIndex(
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
).as_unit(unit)
tm.assert_index_equal(result, exp)
# insertion of non-datetime should coerce to object index
result = idx.insert(1, "inserted")
expected = Index(
[
datetime(2000, 1, 4),
"inserted",
datetime(2000, 1, 1),
datetime(2000, 1, 2),
],
name="idx",
)
assert not isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_insert2(self, unit):
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
# preserve freq
expected_0 = DatetimeIndex(
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq="ME",
).as_unit(unit)
expected_3 = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
name="idx",
freq="ME",
).as_unit(unit)
# reset freq to None
expected_1_nofreq = DatetimeIndex(
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
name="idx",
freq=None,
).as_unit(unit)
expected_3_nofreq = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
).as_unit(unit)
cases = [
(0, datetime(1999, 12, 31), expected_0),
(-3, datetime(1999, 12, 31), expected_0),
(3, datetime(2000, 4, 30), expected_3),
(1, datetime(2000, 1, 31), expected_1_nofreq),
(3, datetime(2000, 1, 2), expected_3_nofreq),
]
for n, d, expected in cases:
result = idx.insert(n, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
def test_insert3(self, unit):
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
# reset freq to None
result = idx.insert(3, datetime(2000, 1, 2))
expected = DatetimeIndex(
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
name="idx",
freq=None,
).as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq is None
def test_insert4(self, unit):
for tz in ["US/Pacific", "Asia/Singapore"]:
idx = date_range(
"1/1/2000 09:00", periods=6, freq="h", tz=tz, name="idx", unit=unit
)
# preserve freq
expected = date_range(
"1/1/2000 09:00", periods=7, freq="h", tz=tz, name="idx", unit=unit
)
for d in [
Timestamp("2000-01-01 15:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
assert result.tz == expected.tz
expected = DatetimeIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-01 14:00",
"2000-01-01 10:00",
],
name="idx",
tz=tz,
freq=None,
).as_unit(unit)
# reset freq to None
for d in [
Timestamp("2000-01-01 10:00", tz=tz),
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
]:
result = idx.insert(6, d)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.tz == expected.tz
assert result.freq is None
# TODO: also changes DataFrame.__setitem__ with expansion
def test_insert_mismatched_tzawareness(self):
# see GH#7299
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
# mismatched tz-awareness
item = Timestamp("2000-01-04")
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
# mismatched tz-awareness
item = datetime(2000, 1, 4)
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
)
tm.assert_index_equal(result, expected)
# TODO: also changes DataFrame.__setitem__ with expansion
def test_insert_mismatched_tz(self):
# see GH#7299
# pre-2.0 with mismatched tzs we would cast to object
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
# mismatched tz -> cast to object (could reasonably cast to same tz or UTC)
item = Timestamp("2000-01-04", tz="US/Eastern")
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]),
name="idx",
)
assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)
item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))
result = idx.insert(3, item)
expected = Index(
list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]),
name="idx",
)
assert expected.dtype == idx.dtype
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)]
)
def test_insert_mismatched_types_raises(self, tz_aware_fixture, item):
# GH#33703 dont cast these to dt64
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz)
result = dti.insert(1, item)
if isinstance(item, np.ndarray):
assert item.item() == 0
expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9)
else:
expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9)
tm.assert_index_equal(result, expected)
def test_insert_castable_str(self, tz_aware_fixture):
# GH#33703
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
value = "2019-11-05"
result = dti.insert(0, value)
ts = Timestamp(value).tz_localize(tz)
expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9)
tm.assert_index_equal(result, expected)
def test_insert_non_castable_str(self, tz_aware_fixture):
# GH#33703
tz = tz_aware_fixture
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
value = "foo"
result = dti.insert(0, value)
expected = Index(["foo"] + list(dti), dtype=object, name=9)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,28 @@
from pandas import (
DataFrame,
DatetimeIndex,
date_range,
)
import pandas._testing as tm
def test_isocalendar_returns_correct_values_close_to_new_year_with_tz():
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
result = dates.isocalendar()
expected_data_frame = DataFrame(
[[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]],
columns=["year", "week", "day"],
index=dates,
dtype="UInt32",
)
tm.assert_frame_equal(result, expected_data_frame)
def test_dti_timestamp_isocalendar_fields():
idx = date_range("2020-01-01", periods=10)
expected = tuple(idx.isocalendar().iloc[-1].to_list())
result = idx[-1].isocalendar()
assert result == expected

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
DatetimeIndex,
Index,
MultiIndex,
Period,
date_range,
)
import pandas._testing as tm
class TestMap:
def test_map(self):
rng = date_range("1/1/2000", periods=10)
f = lambda x: x.strftime("%Y%m%d")
result = rng.map(f)
exp = Index([f(x) for x in rng])
tm.assert_index_equal(result, exp)
def test_map_fallthrough(self, capsys):
# GH#22067, check we don't get warnings about silently ignored errors
dti = date_range("2017-01-01", "2018-01-01", freq="B")
dti.map(lambda x: Period(year=x.year, month=x.month, freq="M"))
captured = capsys.readouterr()
assert captured.err == ""
def test_map_bug_1677(self):
index = DatetimeIndex(["2012-04-25 09:30:00.393000"])
f = index.asof
result = index.map(f)
expected = Index([f(index[0])])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("name", [None, "name"])
def test_index_map(self, name):
# see GH#20990
count = 6
index = date_range("2018-01-01", periods=count, freq="ME", name=name).map(
lambda x: (x.year, x.month)
)
exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name])
tm.assert_index_equal(index, exp_index)

View File

@ -0,0 +1,95 @@
from dateutil.tz import tzlocal
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DatetimeIndex,
NaT,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestNormalize:
def test_normalize(self):
rng = date_range("1/1/2000 9:30", periods=10, freq="D")
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D")
tm.assert_index_equal(result, expected)
arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype(
"datetime64[ns]"
)
rng_ns = DatetimeIndex(arr_ns)
rng_ns_normalized = rng_ns.normalize()
arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype(
"datetime64[ns]"
)
expected = DatetimeIndex(arr_ns)
tm.assert_index_equal(rng_ns_normalized, expected)
assert result.is_normalized
assert not rng.is_normalized
def test_normalize_nat(self):
dti = DatetimeIndex([NaT, Timestamp("2018-01-01 01:00:00")])
result = dti.normalize()
expected = DatetimeIndex([NaT, Timestamp("2018-01-01")])
tm.assert_index_equal(result, expected)
def test_normalize_tz(self):
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern")
result = rng.normalize() # does not preserve freq
expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern")
tm.assert_index_equal(result, expected._with_freq(None))
assert result.is_normalized
assert not rng.is_normalized
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC")
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC")
tm.assert_index_equal(result, expected)
assert result.is_normalized
assert not rng.is_normalized
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
result = rng.normalize() # does not preserve freq
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
tm.assert_index_equal(result, expected._with_freq(None))
assert result.is_normalized
assert not rng.is_normalized
@td.skip_if_windows
@pytest.mark.parametrize(
"timezone",
[
"US/Pacific",
"US/Eastern",
"UTC",
"Asia/Kolkata",
"Asia/Shanghai",
"Australia/Canberra",
],
)
def test_normalize_tz_local(self, timezone):
# GH#13459
with tm.set_timezone(timezone):
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
result = rng.normalize()
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
expected = expected._with_freq(None)
tm.assert_index_equal(result, expected)
assert result.is_normalized
assert not rng.is_normalized

View File

@ -0,0 +1,83 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestRepeat:
def test_repeat_range(self, tz_naive_fixture):
rng = date_range("1/1/2000", "1/1/2001")
result = rng.repeat(5)
assert result.freq is None
assert len(result) == 5 * len(rng)
def test_repeat_range2(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat_range3(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = date_range("2001-01-01", periods=2, freq="2D", tz=tz, unit=unit)
exp = DatetimeIndex(
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
).as_unit(unit)
for res in [index.repeat(2), np.repeat(index, 2)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat_range4(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz).as_unit(unit)
exp = DatetimeIndex(
[
"2001-01-01",
"2001-01-01",
"2001-01-01",
"NaT",
"NaT",
"NaT",
"2003-01-01",
"2003-01-01",
"2003-01-01",
],
tz=tz,
).as_unit(unit)
for res in [index.repeat(3), np.repeat(index, 3)]:
tm.assert_index_equal(res, exp)
assert res.freq is None
def test_repeat(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
reps = 2
msg = "the 'axis' parameter is not supported"
rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz, unit=unit)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:30:00", tz=tz),
Timestamp("2016-01-01 00:30:00", tz=tz),
]
).as_unit(unit)
res = rng.repeat(reps)
tm.assert_index_equal(res, expected_rng)
assert res.freq is None
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
with pytest.raises(ValueError, match=msg):
np.repeat(rng, reps, axis=1)

View File

@ -0,0 +1,31 @@
from dateutil.tz import tzlocal
import pytest
from pandas.compat import IS64
from pandas import date_range
@pytest.mark.parametrize(
"freq,expected",
[
("YE", "day"),
("QE", "day"),
("ME", "day"),
("D", "day"),
("h", "hour"),
("min", "minute"),
("s", "second"),
("ms", "millisecond"),
("us", "microsecond"),
],
)
def test_dti_resolution(request, tz_naive_fixture, freq, expected):
tz = tz_naive_fixture
if freq == "YE" and not IS64 and isinstance(tz, tzlocal):
request.applymarker(
pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
)
idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
assert idx.resolution == expected

View File

@ -0,0 +1,221 @@
import pytest
from pandas._libs.tslibs import to_offset
from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
from pandas import (
DatetimeIndex,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexRound:
def test_round_daily(self):
dti = date_range("20130101 09:10:11", periods=5)
result = dti.round("D")
expected = date_range("20130101", periods=5)
tm.assert_index_equal(result, expected)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
result = dti.round("D")
expected = date_range("20130101", periods=5).tz_localize("US/Eastern")
tm.assert_index_equal(result, expected)
result = dti.round("s")
tm.assert_index_equal(result, dti)
@pytest.mark.parametrize(
"freq, error_msg",
[
("YE", "<YearEnd: month=12> is a non-fixed frequency"),
("ME", "<MonthEnd> is a non-fixed frequency"),
("foobar", "Invalid frequency: foobar"),
],
)
def test_round_invalid(self, freq, error_msg):
dti = date_range("20130101 09:10:11", periods=5)
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
with pytest.raises(ValueError, match=error_msg):
dti.round(freq)
def test_round(self, tz_naive_fixture, unit):
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz, unit=unit)
elt = rng[1]
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 01:00:00", tz=tz),
Timestamp("2016-01-01 02:00:00", tz=tz),
Timestamp("2016-01-01 02:00:00", tz=tz),
]
).as_unit(unit)
expected_elt = expected_rng[1]
result = rng.round(freq="h")
tm.assert_index_equal(result, expected_rng)
assert elt.round(freq="h") == expected_elt
msg = INVALID_FREQ_ERR_MSG
with pytest.raises(ValueError, match=msg):
rng.round(freq="foo")
with pytest.raises(ValueError, match=msg):
elt.round(freq="foo")
msg = "<MonthEnd> is a non-fixed frequency"
with pytest.raises(ValueError, match=msg):
rng.round(freq="ME")
with pytest.raises(ValueError, match=msg):
elt.round(freq="ME")
def test_round2(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH#14440 & GH#15578
index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz).as_unit("ns")
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz).as_unit("ns")
tm.assert_index_equal(result, expected)
for freq in ["us", "ns"]:
tm.assert_index_equal(index, index.round(freq))
def test_round3(self, tz_naive_fixture):
tz = tz_naive_fixture
index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz).as_unit("ns")
result = index.round("ms")
expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz).as_unit("ns")
tm.assert_index_equal(result, expected)
def test_round4(self, tz_naive_fixture):
index = DatetimeIndex(["2016-10-17 12:00:00.001501031"], dtype="M8[ns]")
result = index.round("10ns")
expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"], dtype="M8[ns]")
tm.assert_index_equal(result, expected)
ts = "2016-10-17 12:00:00.001501031"
dti = DatetimeIndex([ts], dtype="M8[ns]")
with tm.assert_produces_warning(False):
dti.round("1010ns")
def test_no_rounding_occurs(self, tz_naive_fixture):
# GH 21262
tz = tz_naive_fixture
rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz)
expected_rng = DatetimeIndex(
[
Timestamp("2016-01-01 00:00:00", tz=tz),
Timestamp("2016-01-01 00:02:00", tz=tz),
Timestamp("2016-01-01 00:04:00", tz=tz),
Timestamp("2016-01-01 00:06:00", tz=tz),
Timestamp("2016-01-01 00:08:00", tz=tz),
]
).as_unit("ns")
result = rng.round(freq="2min")
tm.assert_index_equal(result, expected_rng)
@pytest.mark.parametrize(
"test_input, rounder, freq, expected",
[
(["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]),
(["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]),
(
["2117-01-01 00:00:45.000000012"],
"floor",
"10ns",
["2117-01-01 00:00:45.000000010"],
),
(
["1823-01-01 00:00:01.000000012"],
"ceil",
"10ns",
["1823-01-01 00:00:01.000000020"],
),
(["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]),
(["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]),
(["2018-01-01 00:15:00"], "ceil", "15min", ["2018-01-01 00:15:00"]),
(["2018-01-01 00:15:00"], "floor", "15min", ["2018-01-01 00:15:00"]),
(["1823-01-01 03:00:00"], "ceil", "3h", ["1823-01-01 03:00:00"]),
(["1823-01-01 03:00:00"], "floor", "3h", ["1823-01-01 03:00:00"]),
(
("NaT", "1823-01-01 00:00:01"),
"floor",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
(
("NaT", "1823-01-01 00:00:01"),
"ceil",
"1s",
("NaT", "1823-01-01 00:00:01"),
),
],
)
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = DatetimeIndex(list(test_input))
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex(list(expected))
assert expected.equals(result)
@pytest.mark.parametrize(
"start, index_freq, periods",
[("2018-01-01", "12h", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)],
)
@pytest.mark.parametrize(
"round_freq",
[
"2ns",
"3ns",
"4ns",
"5ns",
"6ns",
"7ns",
"250ns",
"500ns",
"750ns",
"1us",
"19us",
"250us",
"500us",
"750us",
"1s",
"2s",
"3s",
"12h",
"1D",
],
)
def test_round_int64(self, start, index_freq, periods, round_freq):
dt = date_range(start=start, freq=index_freq, periods=periods)
unit = to_offset(round_freq).nanos
# test floor
result = dt.floor(round_freq)
diff = dt.asi8 - result.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), f"floor not a {round_freq} multiple"
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
# test ceil
result = dt.ceil(round_freq)
diff = result.asi8 - dt.asi8
mod = result.asi8 % unit
assert (mod == 0).all(), f"ceil not a {round_freq} multiple"
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
# test round
result = dt.round(round_freq)
diff = abs(result.asi8 - dt.asi8)
mod = result.asi8 % unit
assert (mod == 0).all(), f"round not a {round_freq} multiple"
assert (diff <= unit // 2).all(), "round error"
if unit % 2 == 0:
assert (
result.asi8[diff == unit // 2] % 2 == 0
).all(), "round half to even error"

View File

@ -0,0 +1,169 @@
from datetime import datetime
import pytest
import pytz
from pandas.errors import NullFrequencyError
import pandas as pd
from pandas import (
DatetimeIndex,
Series,
date_range,
)
import pandas._testing as tm
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexShift:
# -------------------------------------------------------------
# DatetimeIndex.shift is used in integer addition
def test_dti_shift_tzaware(self, tz_naive_fixture, unit):
# GH#9903
tz = tz_naive_fixture
idx = DatetimeIndex([], name="xxx", tz=tz).as_unit(unit)
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
tm.assert_index_equal(idx.shift(3, freq="h"), idx)
idx = DatetimeIndex(
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
exp = DatetimeIndex(
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(3, freq="h"), exp)
exp = DatetimeIndex(
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
name="xxx",
tz=tz,
freq="h",
).as_unit(unit)
tm.assert_index_equal(idx.shift(-3, freq="h"), exp)
def test_dti_shift_freqs(self, unit):
# test shift for DatetimeIndex and non DatetimeIndex
# GH#8083
drange = date_range("20130101", periods=5, unit=unit)
result = drange.shift(1)
expected = DatetimeIndex(
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(-1)
expected = DatetimeIndex(
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
result = drange.shift(3, freq="2D")
expected = DatetimeIndex(
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
dtype=f"M8[{unit}]",
freq="D",
)
tm.assert_index_equal(result, expected)
def test_dti_shift_int(self, unit):
rng = date_range("1/1/2000", periods=20, unit=unit)
result = rng + 5 * rng.freq
expected = rng.shift(5)
tm.assert_index_equal(result, expected)
result = rng - 5 * rng.freq
expected = rng.shift(-5)
tm.assert_index_equal(result, expected)
def test_dti_shift_no_freq(self, unit):
# GH#19147
dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None).as_unit(unit)
with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"):
dti.shift(2)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_shift_localized(self, tzstr, unit):
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI", unit=unit)
dr_tz = dr.tz_localize(tzstr)
result = dr_tz.shift(1, "10min")
assert result.tz == dr_tz.tz
def test_dti_shift_across_dst(self, unit):
# GH 8616
idx = date_range(
"2013-11-03", tz="America/Chicago", periods=7, freq="h", unit=unit
)
ser = Series(index=idx[:-1], dtype=object)
result = ser.shift(freq="h")
expected = Series(index=idx[1:], dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"shift, result_time",
[
[0, "2014-11-14 00:00:00"],
[-1, "2014-11-13 23:00:00"],
[1, "2014-11-14 01:00:00"],
],
)
def test_dti_shift_near_midnight(self, shift, result_time, unit):
# GH 8616
dt = datetime(2014, 11, 14, 0)
dt_est = pytz.timezone("EST").localize(dt)
idx = DatetimeIndex([dt_est]).as_unit(unit)
ser = Series(data=[1], index=idx)
result = ser.shift(shift, freq="h")
exp_index = DatetimeIndex([result_time], tz="EST").as_unit(unit)
expected = Series(1, index=exp_index)
tm.assert_series_equal(result, expected)
def test_shift_periods(self, unit):
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
idx = date_range(start=START, end=END, periods=3, unit=unit)
tm.assert_index_equal(idx.shift(periods=0), idx)
tm.assert_index_equal(idx.shift(0), idx)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_shift_bday(self, freq, unit):
rng = date_range(START, END, freq=freq, unit=unit)
shifted = rng.shift(5)
assert shifted[0] == rng[5]
assert shifted.freq == rng.freq
shifted = rng.shift(-5)
assert shifted[5] == rng[0]
assert shifted.freq == rng.freq
shifted = rng.shift(0)
assert shifted[0] == rng[0]
assert shifted.freq == rng.freq
def test_shift_bmonth(self, unit):
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
shifted = rng.shift(1, freq=pd.offsets.BDay())
assert shifted[0] == rng[0] + pd.offsets.BDay()
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
shifted = rng.shift(1, freq=pd.offsets.CDay())
assert shifted[0] == rng[0] + pd.offsets.CDay()
def test_shift_empty(self, unit):
# GH#14811
dti = date_range(start="2016-10-21", end="2016-10-21", freq="BME", unit=unit)
result = dti.shift(1)
tm.assert_index_equal(result, dti)

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
DatetimeIndex,
date_range,
)
import pandas._testing as tm
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
@pytest.mark.parametrize("name", [None, "my_dti"])
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
def test_dti_snap(name, tz, unit):
dti = DatetimeIndex(
[
"1/1/2002",
"1/2/2002",
"1/3/2002",
"1/4/2002",
"1/5/2002",
"1/6/2002",
"1/7/2002",
],
name=name,
tz=tz,
freq="D",
)
dti = dti.as_unit(unit)
result = dti.snap(freq="W-MON")
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
expected = expected.repeat([3, 4])
expected = expected.as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
assert result.freq is None
assert expected.freq is None
result = dti.snap(freq="B")
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
expected = expected.repeat([1, 1, 1, 2, 2])
expected = expected.as_unit(unit)
tm.assert_index_equal(result, expected)
assert result.tz == expected.tz
assert result.freq is None
assert expected.freq is None

View File

@ -0,0 +1,28 @@
from pandas import (
DataFrame,
Index,
date_range,
)
import pandas._testing as tm
class TestToFrame:
def test_to_frame_datetime_tz(self):
# GH#25809
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
result = idx.to_frame()
expected = DataFrame(idx, index=idx)
tm.assert_frame_equal(result, expected)
def test_to_frame_respects_none_name(self):
# GH#44212 if we explicitly pass name=None, then that should be respected,
# not changed to 0
# GH-45448 this is first deprecated to only change in the future
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
result = idx.to_frame(name=None)
exp_idx = Index([None], dtype=object)
tm.assert_index_equal(exp_idx, result.columns)
result = idx.rename("foo").to_frame(name=None)
exp_idx = Index([None], dtype=object)
tm.assert_index_equal(exp_idx, result.columns)

View File

@ -0,0 +1,45 @@
import numpy as np
from pandas import (
Index,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDateTimeIndexToJulianDate:
def test_1700(self):
dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_2000(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_hour(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="h")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_minute(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="min")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)
def test_second(self):
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="s")
r1 = Index([x.to_julian_date() for x in dr])
r2 = dr.to_julian_date()
assert isinstance(r2, Index) and r2.dtype == np.float64
tm.assert_index_equal(r1, r2)

View File

@ -0,0 +1,225 @@
import dateutil.tz
from dateutil.tz import tzlocal
import pytest
import pytz
from pandas._libs.tslibs.ccalendar import MONTHS
from pandas._libs.tslibs.offsets import MonthEnd
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
from pandas import (
DatetimeIndex,
Period,
PeriodIndex,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
class TestToPeriod:
def test_dti_to_period(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
pi1 = dti.to_period()
pi2 = dti.to_period(freq="D")
pi3 = dti.to_period(freq="3D")
assert pi1[0] == Period("Jan 2005", freq="M")
assert pi2[0] == Period("1/31/2005", freq="D")
assert pi3[0] == Period("1/31/2005", freq="3D")
assert pi1[-1] == Period("Nov 2005", freq="M")
assert pi2[-1] == Period("11/30/2005", freq="D")
assert pi3[-1], Period("11/30/2005", freq="3D")
tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M"))
tm.assert_index_equal(
pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D")
)
tm.assert_index_equal(
pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D")
)
@pytest.mark.parametrize("month", MONTHS)
def test_to_period_quarterly(self, month):
# make sure we can make the round trip
freq = f"Q-{month}"
rng = period_range("1989Q3", "1991Q3", freq=freq)
stamps = rng.to_timestamp()
result = stamps.to_period(freq)
tm.assert_index_equal(rng, result)
@pytest.mark.parametrize("off", ["BQE", "QS", "BQS"])
def test_to_period_quarterlyish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "QE-DEC"
@pytest.mark.parametrize("off", ["BYE", "YS", "BYS"])
def test_to_period_annualish(self, off):
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freq == "YE-DEC"
def test_to_period_monthish(self):
offsets = ["MS", "BME"]
for off in offsets:
rng = date_range("01-Jan-2012", periods=8, freq=off)
prng = rng.to_period()
assert prng.freqstr == "M"
rng = date_range("01-Jan-2012", periods=8, freq="ME")
prng = rng.to_period()
assert prng.freqstr == "M"
with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
date_range("01-Jan-2012", periods=8, freq="EOM")
@pytest.mark.parametrize(
"freq_offset, freq_period",
[
("2ME", "2M"),
(MonthEnd(2), MonthEnd(2)),
],
)
def test_dti_to_period_2monthish(self, freq_offset, freq_period):
dti = date_range("2020-01-01", periods=3, freq=freq_offset)
pi = dti.to_period()
tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq_period))
@pytest.mark.parametrize(
"freq, freq_depr",
[
("2ME", "2M"),
("2QE", "2Q"),
("2QE-SEP", "2Q-SEP"),
("1YE", "1Y"),
("2YE-MAR", "2Y-MAR"),
("1YE", "1A"),
("2YE-MAR", "2A-MAR"),
],
)
def test_to_period_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
# GH#9586
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
f"in a future version, please use '{freq[1:]}' instead."
rng = date_range("01-Jan-2012", periods=8, freq=freq)
prng = rng.to_period()
with tm.assert_produces_warning(FutureWarning, match=msg):
assert prng.freq == freq_depr
def test_to_period_infer(self):
# https://github.com/pandas-dev/pandas/issues/33358
rng = date_range(
start="2019-12-22 06:40:00+00:00",
end="2019-12-22 08:45:00+00:00",
freq="5min",
)
with tm.assert_produces_warning(UserWarning):
pi1 = rng.to_period("5min")
with tm.assert_produces_warning(UserWarning):
pi2 = rng.to_period()
tm.assert_index_equal(pi1, pi2)
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
def test_period_dt64_round_trip(self):
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period()
tm.assert_index_equal(pi.to_timestamp(), dti)
dti = date_range("1/1/2000", "1/7/2002", freq="B")
pi = dti.to_period(freq="h")
tm.assert_index_equal(pi.to_timestamp(), dti)
def test_to_period_millisecond(self):
index = DatetimeIndex(
[
Timestamp("2007-01-01 10:11:12.123456Z"),
Timestamp("2007-01-01 10:11:13.789123Z"),
]
)
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="ms")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123Z", "ms")
assert period[1] == Period("2007-01-01 10:11:13.789Z", "ms")
def test_to_period_microsecond(self):
index = DatetimeIndex(
[
Timestamp("2007-01-01 10:11:12.123456Z"),
Timestamp("2007-01-01 10:11:13.789123Z"),
]
)
with tm.assert_produces_warning(UserWarning):
# warning that timezone info will be lost
period = index.to_period(freq="us")
assert 2 == len(period)
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "us")
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "us")
@pytest.mark.parametrize(
"tz",
["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()],
)
def test_to_period_tz(self, tz):
ts = date_range("1/1/2000", "2/1/2000", tz=tz)
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
# filter warning about freq deprecation
result = ts.to_period()[0]
expected = ts[0].to_period(ts.freq)
assert result == expected
expected = date_range("1/1/2000", "2/1/2000").to_period()
with tm.assert_produces_warning(UserWarning):
# GH#21333 warning that timezone info will be lost
result = ts.to_period(ts.freq)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"])
def test_to_period_tz_utc_offset_consistency(self, tz):
# GH#22905
ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
with tm.assert_produces_warning(UserWarning):
result = ts.to_period()[0]
expected = ts[0].to_period(ts.freq)
assert result == expected
def test_to_period_nofreq(self):
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"])
msg = "You must pass a freq argument as current index has none."
with pytest.raises(ValueError, match=msg):
idx.to_period()
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer")
assert idx.freqstr == "D"
expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D")
tm.assert_index_equal(idx.to_period(), expected)
# GH#7606
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert idx.freqstr is None
tm.assert_index_equal(idx.to_period(), expected)
@pytest.mark.parametrize("freq", ["2BMS", "1SME-15"])
def test_to_period_offsets_not_supported(self, freq):
# GH#56243
msg = f"{freq[1:]} is not supported as period frequency"
ts = date_range("1/1/2012", periods=4, freq=freq)
with pytest.raises(ValueError, match=msg):
ts.to_period()

View File

@ -0,0 +1,51 @@
from datetime import (
datetime,
timezone,
)
import dateutil.parser
import dateutil.tz
from dateutil.tz import tzlocal
import numpy as np
from pandas import (
DatetimeIndex,
date_range,
to_datetime,
)
import pandas._testing as tm
from pandas.tests.indexes.datetimes.test_timezones import FixedOffset
fixed_off = FixedOffset(-420, "-07:00")
class TestToPyDatetime:
def test_dti_to_pydatetime(self):
dt = dateutil.parser.parse("2012-06-13T01:39:00Z")
dt = dt.replace(tzinfo=tzlocal())
arr = np.array([dt], dtype=object)
result = to_datetime(arr, utc=True)
assert result.tz is timezone.utc
rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal())
arr = rng.to_pydatetime()
result = to_datetime(arr, utc=True)
assert result.tz is timezone.utc
def test_dti_to_pydatetime_fizedtz(self):
dates = np.array(
[
datetime(2000, 1, 1, tzinfo=fixed_off),
datetime(2000, 1, 2, tzinfo=fixed_off),
datetime(2000, 1, 3, tzinfo=fixed_off),
]
)
dti = DatetimeIndex(dates)
result = dti.to_pydatetime()
tm.assert_numpy_array_equal(dates, result)
result = dti._mpl_repr()
tm.assert_numpy_array_equal(dates, result)

View File

@ -0,0 +1,18 @@
import numpy as np
from pandas import (
DatetimeIndex,
Series,
)
import pandas._testing as tm
class TestToSeries:
def test_to_series(self):
naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
idx = naive.tz_localize("US/Pacific")
expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
result = idx.to_series(index=[0, 1])
assert expected.dtype == idx.dtype
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,283 @@
from datetime import datetime
import dateutil.tz
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
class TestTZConvert:
def test_tz_convert_nat(self):
# GH#5546
dates = [NaT]
idx = DatetimeIndex(dates)
idx = idx.tz_localize("US/Pacific")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern"))
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC"))
dates = ["2010-12-01 00:00", "2010-12-02 00:00", NaT]
idx = DatetimeIndex(dates)
idx = idx.tz_localize("US/Pacific")
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
expected = ["2010-12-01 03:00", "2010-12-02 03:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
idx = idx + offsets.Hour(5)
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
idx = idx.tz_convert("US/Pacific")
expected = ["2010-12-01 05:00", "2010-12-02 05:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
idx = idx + np.timedelta64(3, "h")
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
idx = idx.tz_convert("US/Eastern")
expected = ["2010-12-01 11:00", "2010-12-02 11:00", NaT]
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_tz_convert_compat_timestamp(self, prefix):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
conv = idx[0].tz_convert(prefix + "US/Pacific")
expected = idx.tz_convert(prefix + "US/Pacific")[0]
assert conv == expected
def test_dti_tz_convert_hour_overflow_dst(self):
# Regression test for GH#13306
# sorted case US/Eastern -> UTC
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"]
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# sorted case UTC -> US/Eastern
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"]
tt = DatetimeIndex(ts).tz_localize("UTC")
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case US/Eastern -> UTC
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"]
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case UTC -> US/Eastern
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"]
tt = DatetimeIndex(ts).tz_localize("UTC")
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz):
# Regression test for GH#13306
# sorted case US/Eastern -> UTC
ts = [
Timestamp("2008-05-12 09:50:00", tz=tz),
Timestamp("2008-12-12 09:50:35", tz=tz),
Timestamp("2009-05-12 09:50:32", tz=tz),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# sorted case UTC -> US/Eastern
ts = [
Timestamp("2008-05-12 13:50:00", tz="UTC"),
Timestamp("2008-12-12 14:50:35", tz="UTC"),
Timestamp("2009-05-12 13:50:32", tz="UTC"),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case US/Eastern -> UTC
ts = [
Timestamp("2008-05-12 09:50:00", tz=tz),
Timestamp("2008-12-12 09:50:35", tz=tz),
Timestamp("2008-05-12 09:50:32", tz=tz),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("UTC")
expected = Index([13, 14, 13], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
# unsorted case UTC -> US/Eastern
ts = [
Timestamp("2008-05-12 13:50:00", tz="UTC"),
Timestamp("2008-12-12 14:50:35", tz="UTC"),
Timestamp("2008-05-12 13:50:32", tz="UTC"),
]
tt = DatetimeIndex(ts)
ut = tt.tz_convert("US/Eastern")
expected = Index([9, 9, 9], dtype=np.int32)
tm.assert_index_equal(ut.hour, expected)
@pytest.mark.parametrize("freq, n", [("h", 1), ("min", 60), ("s", 3600)])
def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n):
# Regression test for tslib.tz_convert(vals, tz1, tz2).
# See GH#4496 for details.
idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq)
idx = idx.tz_localize("UTC")
idx = idx.tz_convert("Europe/Moscow")
expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1]))
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
def test_dti_tz_convert_dst(self):
for freq, n in [("h", 1), ("min", 60), ("s", 3600)]:
# Start DST
idx = date_range(
"2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC"
)
idx = idx.tz_convert("US/Eastern")
expected = np.repeat(
np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
idx = date_range(
"2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
expected = np.repeat(
np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
# End DST
idx = date_range(
"2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC"
)
idx = idx.tz_convert("US/Eastern")
expected = np.repeat(
np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]),
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
idx = date_range(
"2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
expected = np.repeat(
np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]),
)
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
# daily
# Start DST
idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC")
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32))
idx = date_range(
"2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32))
# End DST
idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC")
idx = idx.tz_convert("US/Eastern")
tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32))
idx = date_range(
"2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern"
)
idx = idx.tz_convert("UTC")
tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32))
def test_tz_convert_roundtrip(self, tz_aware_fixture):
tz = tz_aware_fixture
idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME", tz="UTC")
exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME")
idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC")
exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D")
idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="h", tz="UTC")
exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="h")
idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="min", tz="UTC")
exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="min")
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]:
converted = idx.tz_convert(tz)
reset = converted.tz_convert(None)
tm.assert_index_equal(reset, expected)
assert reset.tzinfo is None
expected = converted.tz_convert("UTC").tz_localize(None)
expected = expected._with_freq("infer")
tm.assert_index_equal(reset, expected)
def test_dti_tz_convert_tzlocal(self):
# GH#13583
# tz_convert doesn't affect to internal
dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC")
dti2 = dti.tz_convert(dateutil.tz.tzlocal())
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
dti2 = dti.tz_convert(None)
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
@pytest.mark.parametrize(
"tz",
[
"US/Eastern",
"dateutil/US/Eastern",
pytz.timezone("US/Eastern"),
gettz("US/Eastern"),
],
)
def test_dti_tz_convert_utc_to_local_no_modify(self, tz):
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tz)
# Values are unmodified
tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8)
assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz))
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_tz_convert_unsorted(self, tzstr):
dr = date_range("2012-03-09", freq="h", periods=100, tz="utc")
dr = dr.tz_convert(tzstr)
result = dr[::-1].hour
exp = dr.hour[::-1]
tm.assert_almost_equal(result, exp)

View File

@ -0,0 +1,402 @@
from datetime import (
datetime,
timedelta,
)
import dateutil.tz
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas import (
DatetimeIndex,
Timestamp,
bdate_range,
date_range,
offsets,
to_datetime,
)
import pandas._testing as tm
try:
from zoneinfo import ZoneInfo
except ImportError:
# Cannot assign to a type [misc]
ZoneInfo = None # type: ignore[misc, assignment]
easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")]
if ZoneInfo is not None:
try:
tz = ZoneInfo("US/Eastern")
except KeyError:
# no tzdata
pass
else:
easts.append(tz)
class TestTZLocalize:
def test_tz_localize_invalidates_freq(self):
# we only preserve freq in unambiguous cases
# if localized to US/Eastern, this crosses a DST transition
dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="h")
assert dti.freq == "h"
result = dti.tz_localize(None) # no-op
assert result.freq == "h"
result = dti.tz_localize("UTC") # unambiguous freq preservation
assert result.freq == "h"
result = dti.tz_localize("US/Eastern", nonexistent="shift_forward")
assert result.freq is None
assert result.inferred_freq is None # i.e. we are not _too_ strict here
# Case where we _can_ keep freq because we're length==1
dti2 = dti[:1]
result = dti2.tz_localize("US/Eastern")
assert result.freq == "h"
def test_tz_localize_utc_copies(self, utc_fixture):
# GH#46460
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
index = DatetimeIndex(times)
res = index.tz_localize(utc_fixture)
assert not tm.shares_memory(res, index)
res2 = index._data.tz_localize(utc_fixture)
assert not tm.shares_memory(index._data, res2)
def test_dti_tz_localize_nonexistent_raise_coerce(self):
# GH#13057
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
index = DatetimeIndex(times)
tz = "US/Eastern"
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
index.tz_localize(tz=tz)
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
index.tz_localize(tz=tz, nonexistent="raise")
result = index.tz_localize(tz=tz, nonexistent="NaT")
test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"]
dti = to_datetime(test_times, utc=True)
expected = dti.tz_convert("US/Eastern")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer(self, tz):
# November 6, 2011, fall back, repeat 2 AM hour
# With no repeated hours, we cannot infer the transition
dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour())
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dr.tz_localize(tz)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer2(self, tz, unit):
# With repeated hours, we can infer the transition
dr = date_range(
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
)
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di = DatetimeIndex(times).as_unit(unit)
result = di.tz_localize(tz, ambiguous="infer")
expected = dr._with_freq(None)
tm.assert_index_equal(result, expected)
result2 = DatetimeIndex(times, tz=tz, ambiguous="infer").as_unit(unit)
tm.assert_index_equal(result2, expected)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer3(self, tz):
# When there is no dst transition, nothing special happens
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
localized = dr.tz_localize(tz)
localized_infer = dr.tz_localize(tz, ambiguous="infer")
tm.assert_index_equal(localized, localized_infer)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_times(self, tz):
# March 13, 2011, spring forward, skip from 2 AM to 3 AM
dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour())
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"):
dr.tz_localize(tz)
# after dst transition, it works
dr = date_range(
datetime(2011, 3, 13, 3, 30), periods=3, freq=offsets.Hour(), tz=tz
)
# November 6, 2011, fall back, repeat 2 AM hour
dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour())
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dr.tz_localize(tz)
# UTC is OK
dr = date_range(
datetime(2011, 3, 13), periods=48, freq=offsets.Minute(30), tz=pytz.utc
)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
idx = DatetimeIndex(strdates)
conv = idx.tz_localize(tzstr)
fromdates = DatetimeIndex(strdates, tz=tzstr)
assert conv.tz == fromdates.tz
tm.assert_numpy_array_equal(conv.values, fromdates.values)
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_tz_localize(self, prefix):
tzstr = prefix + "US/Eastern"
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
dti2 = dti.tz_localize(tzstr)
dti_utc = date_range(
start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
)
tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
dti3 = dti2.tz_convert(prefix + "US/Pacific")
tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
dti.tz_localize(tzstr)
dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"):
dti.tz_localize(tzstr)
@pytest.mark.parametrize(
"tz",
[
"US/Eastern",
"dateutil/US/Eastern",
pytz.timezone("US/Eastern"),
gettz("US/Eastern"),
],
)
def test_dti_tz_localize_utc_conversion(self, tz):
# Localizing to time zone should:
# 1) check for DST ambiguities
# 2) convert to UTC
rng = date_range("3/10/2012", "3/11/2012", freq="30min")
converted = rng.tz_localize(tz)
expected_naive = rng + offsets.Hour(5)
tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8)
# DST ambiguity, this should fail
rng = date_range("3/11/2012", "3/12/2012", freq="30min")
# Is this really how it should fail??
with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"):
rng.tz_localize(tz)
def test_dti_tz_localize_roundtrip(self, tz_aware_fixture):
# note: this tz tests that a tz-naive index can be localized
# and de-localized successfully, when there are no DST transitions
# in the range.
idx = date_range(start="2014-06-01", end="2014-08-30", freq="15min")
tz = tz_aware_fixture
localized = idx.tz_localize(tz)
# can't localize a tz-aware object
with pytest.raises(
TypeError, match="Already tz-aware, use tz_convert to convert"
):
localized.tz_localize(tz)
reset = localized.tz_localize(None)
assert reset.tzinfo is None
expected = idx._with_freq(None)
tm.assert_index_equal(reset, expected)
def test_dti_tz_localize_naive(self):
rng = date_range("1/1/2011", periods=100, freq="h")
conv = rng.tz_localize("US/Pacific")
exp = date_range("1/1/2011", periods=100, freq="h", tz="US/Pacific")
tm.assert_index_equal(conv, exp._with_freq(None))
def test_dti_tz_localize_tzlocal(self):
# GH#13583
offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1))
offset = int(offset.total_seconds() * 1000000000)
dti = date_range(start="2001-01-01", end="2001-03-01")
dti2 = dti.tz_localize(dateutil.tz.tzlocal())
tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8)
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
dti2 = dti.tz_localize(None)
tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_nat(self, tz):
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di = DatetimeIndex(times)
localized = di.tz_localize(tz, ambiguous="NaT")
times = [
"11/06/2011 00:00",
np.nan,
np.nan,
"11/06/2011 02:00",
"11/06/2011 03:00",
]
di_test = DatetimeIndex(times, tz="US/Eastern")
# left dtype is datetime64[ns, US/Eastern]
# right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')]
tm.assert_numpy_array_equal(di_test.values, localized.values)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_flags(self, tz, unit):
# November 6, 2011, fall back, repeat 2 AM hour
# Pass in flags to determine right dst transition
dr = date_range(
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
)
times = [
"11/06/2011 00:00",
"11/06/2011 01:00",
"11/06/2011 01:00",
"11/06/2011 02:00",
"11/06/2011 03:00",
]
# Test tz_localize
di = DatetimeIndex(times).as_unit(unit)
is_dst = [1, 1, 0, 0, 0]
localized = di.tz_localize(tz, ambiguous=is_dst)
expected = dr._with_freq(None)
tm.assert_index_equal(expected, localized)
result = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
tm.assert_index_equal(result, expected)
localized = di.tz_localize(tz, ambiguous=np.array(is_dst))
tm.assert_index_equal(dr, localized)
localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool"))
tm.assert_index_equal(dr, localized)
# Test constructor
localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
tm.assert_index_equal(dr, localized)
# Test duplicate times where inferring the dst fails
times += times
di = DatetimeIndex(times).as_unit(unit)
# When the sizes are incompatible, make sure error is raised
msg = "Length of ambiguous bool-array must be the same size as vals"
with pytest.raises(Exception, match=msg):
di.tz_localize(tz, ambiguous=is_dst)
# When sizes are compatible and there are repeats ('infer' won't work)
is_dst = np.hstack((is_dst, is_dst))
localized = di.tz_localize(tz, ambiguous=is_dst)
dr = dr.append(dr)
tm.assert_index_equal(dr, localized)
@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_flags2(self, tz, unit):
# When there is no dst transition, nothing special happens
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
is_dst = np.array([1] * 10)
localized = dr.tz_localize(tz)
localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst)
tm.assert_index_equal(localized, localized_is_dst)
def test_dti_tz_localize_bdate_range(self):
dr = bdate_range("1/1/2009", "1/1/2010")
dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc)
localized = dr.tz_localize(pytz.utc)
tm.assert_index_equal(dr_utc, localized)
@pytest.mark.parametrize(
"start_ts, tz, end_ts, shift",
[
["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 01:59:59.999999999",
"backward",
],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 03:20:00",
timedelta(hours=1),
],
[
"2015-03-29 02:20:00",
"Europe/Warsaw",
"2015-03-29 01:20:00",
timedelta(hours=-1),
],
["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 01:59:59.999999999",
"backward",
],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 03:33:00",
timedelta(hours=1),
],
[
"2018-03-11 02:33:00",
"US/Pacific",
"2018-03-11 01:33:00",
timedelta(hours=-1),
],
],
)
@pytest.mark.parametrize("tz_type", ["", "dateutil/"])
def test_dti_tz_localize_nonexistent_shift(
self, start_ts, tz, end_ts, shift, tz_type, unit
):
# GH#8917
tz = tz_type + tz
if isinstance(shift, str):
shift = "shift_" + shift
dti = DatetimeIndex([Timestamp(start_ts)]).as_unit(unit)
result = dti.tz_localize(tz, nonexistent=shift)
expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz).as_unit(unit)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("offset", [-1, 1])
def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, warsaw):
# GH#8917
tz = warsaw
dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")])
msg = "The provided timedelta will relocalize on a nonexistent time"
with pytest.raises(ValueError, match=msg):
dti.tz_localize(tz, nonexistent=timedelta(seconds=offset))

View File

@ -0,0 +1,77 @@
from datetime import (
datetime,
timedelta,
)
from pandas import (
DatetimeIndex,
NaT,
Timestamp,
)
import pandas._testing as tm
def test_unique(tz_naive_fixture):
idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture)
expected = idx[:1]
result = idx.unique()
tm.assert_index_equal(result, expected)
# GH#21737
# Ensure the underlying data is consistent
assert result[0] == expected[0]
def test_index_unique(rand_series_with_duplicate_datetimeindex):
dups = rand_series_with_duplicate_datetimeindex
index = dups.index
uniques = index.unique()
expected = DatetimeIndex(
[
datetime(2000, 1, 2),
datetime(2000, 1, 3),
datetime(2000, 1, 4),
datetime(2000, 1, 5),
],
dtype=index.dtype,
)
assert uniques.dtype == index.dtype # sanity
tm.assert_index_equal(uniques, expected)
assert index.nunique() == 4
# GH#2563
assert isinstance(uniques, DatetimeIndex)
dups_local = index.tz_localize("US/Eastern")
dups_local.name = "foo"
result = dups_local.unique()
expected = DatetimeIndex(expected, name="foo")
expected = expected.tz_localize("US/Eastern")
assert result.tz is not None
assert result.name == "foo"
tm.assert_index_equal(result, expected)
def test_index_unique2():
# NaT, note this is excluded
arr = [1370745748 + t for t in range(20)] + [NaT._value]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_index_unique3():
arr = [
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
] + [NaT]
idx = DatetimeIndex(arr * 3)
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
assert idx.nunique() == 20
assert idx.nunique(dropna=False) == 21
def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex):
index = rand_series_with_duplicate_datetimeindex.index
assert not index.is_unique

View File

@ -0,0 +1,56 @@
# Arithmetic tests specific to DatetimeIndex are generally about `freq`
# rentention or inference. Other arithmetic tests belong in
# tests/arithmetic/test_datetime64.py
import pytest
from pandas import (
Timedelta,
TimedeltaIndex,
Timestamp,
date_range,
timedelta_range,
)
import pandas._testing as tm
class TestDatetimeIndexArithmetic:
def test_add_timedelta_preserves_freq(self):
# GH#37295 should hold for any DTI with freq=None or Tick freq
tz = "Canada/Eastern"
dti = date_range(
start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
freq="D",
)
result = dti + Timedelta(days=1)
assert result.freq == dti.freq
def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
# GH#48818
dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
res = dti - dti[0]
expected = timedelta_range("0 Days", "11 Days")
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq
@pytest.mark.xfail(
reason="The inherited freq is incorrect bc dti.freq is incorrect "
"https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
)
def test_sub_datetime_preserves_freq_across_dst(self):
# GH#48818
ts = Timestamp("2016-03-11", tz="US/Pacific")
dti = date_range(ts, periods=4)
res = dti - dti[0]
expected = TimedeltaIndex(
[
Timedelta(days=0),
Timedelta(days=1),
Timedelta(days=2),
Timedelta(days=2, hours=23),
]
)
tm.assert_index_equal(res, expected)
assert res.freq == expected.freq

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,216 @@
import datetime as dt
from datetime import date
import re
import numpy as np
import pytest
from pandas.compat.numpy import np_long
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
class TestDatetimeIndex:
def test_is_(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
assert dti.is_(dti)
assert dti.is_(dti.view())
assert not dti.is_(dti.copy())
def test_time_overflow_for_32bit_machines(self):
# GH8943. On some machines NumPy defaults to np.int32 (for example,
# 32-bit Linux machines). In the function _generate_regular_range
# found in tseries/index.py, `periods` gets multiplied by `strides`
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
# and since those machines won't promote np.int32 to np.int64, we get
# overflow.
periods = np_long(1000)
idx1 = date_range(start="2000", periods=periods, freq="s")
assert len(idx1) == periods
idx2 = date_range(end="2000", periods=periods, freq="s")
assert len(idx2) == periods
def test_nat(self):
assert DatetimeIndex([np.nan])[0] is pd.NaT
def test_week_of_month_frequency(self):
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
d1 = date(2002, 9, 1)
d2 = date(2013, 10, 27)
d3 = date(2012, 9, 30)
idx1 = DatetimeIndex([d1, d2])
idx2 = DatetimeIndex([d3])
result_append = idx1.append(idx2)
expected = DatetimeIndex([d1, d2, d3])
tm.assert_index_equal(result_append, expected)
result_union = idx1.union(idx2)
expected = DatetimeIndex([d1, d3, d2])
tm.assert_index_equal(result_union, expected)
def test_append_nondatetimeindex(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])
result = rng.append(idx)
assert isinstance(result[0], Timestamp)
def test_misc_coverage(self):
rng = date_range("1/1/2000", periods=5)
result = rng.groupby(rng.day)
assert isinstance(next(iter(result.values()))[0], Timestamp)
# TODO: belongs in frame groupby tests?
def test_groupby_function_tuple_1677(self):
df = DataFrame(
np.random.default_rng(2).random(100),
index=date_range("1/1/2000", periods=100),
)
monthly_group = df.groupby(lambda x: (x.year, x.month))
result = monthly_group.mean()
assert isinstance(result.index[0], tuple)
def assert_index_parameters(self, index):
assert index.freq == "40960ns"
assert index.inferred_freq == "40960ns"
def test_ns_index(self):
nsamples = 400
ns = int(1e9 / 24414)
dtstart = np.datetime64("2012-09-20T00:00:00")
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
freq = ns * offsets.Nano()
index = DatetimeIndex(dt, freq=freq, name="time")
self.assert_index_parameters(index)
new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
self.assert_index_parameters(new_index)
def test_asarray_tz_naive(self):
# This shouldn't produce a warning.
idx = date_range("2000", periods=2)
# M8[ns] by default
result = np.asarray(idx)
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# optionally, object
result = np.asarray(idx, dtype=object)
expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
tm.assert_numpy_array_equal(result, expected)
def test_asarray_tz_aware(self):
tz = "US/Central"
idx = date_range("2000", periods=2, tz=tz)
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
result = np.asarray(idx, dtype="datetime64[ns]")
tm.assert_numpy_array_equal(result, expected)
# Old behavior with no warning
result = np.asarray(idx, dtype="M8[ns]")
tm.assert_numpy_array_equal(result, expected)
# Future behavior with no warning
expected = np.array(
[Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
)
result = np.asarray(idx, dtype=object)
tm.assert_numpy_array_equal(result, expected)
def test_CBH_deprecated(self):
msg = "'CBH' is deprecated and will be removed in a future version."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(
dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
)
result = DatetimeIndex(
[
"2022-12-12 09:00:00",
"2022-12-12 10:00:00",
"2022-12-12 11:00:00",
"2022-12-12 12:00:00",
"2022-12-12 13:00:00",
"2022-12-12 14:00:00",
"2022-12-12 15:00:00",
"2022-12-12 16:00:00",
],
dtype="datetime64[ns]",
freq="cbh",
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq_depr, expected_values, expected_freq",
[
(
"AS-AUG",
["2021-08-01", "2022-08-01", "2023-08-01"],
"YS-AUG",
),
(
"1BAS-MAY",
["2021-05-03", "2022-05-02", "2023-05-01"],
"1BYS-MAY",
),
],
)
def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
# GH#55479
freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(
dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
)
result = DatetimeIndex(
expected_values,
dtype="datetime64[ns]",
freq=expected_freq,
)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"freq, expected_values, freq_depr",
[
("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
("2BQE", ["2016-03-31"], "2BQ"),
("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
],
)
def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
# GH#52064
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
f"in a future version, please use '{freq[1:]}' instead."
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
result = DatetimeIndex(
data=expected_values,
dtype="datetime64[ns]",
freq=freq,
)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,356 @@
from datetime import datetime
import dateutil.tz
import numpy as np
import pytest
import pytz
import pandas as pd
from pandas import (
DatetimeIndex,
NaT,
Series,
)
import pandas._testing as tm
@pytest.fixture(params=["s", "ms", "us", "ns"])
def unit(request):
return request.param
def test_get_values_for_csv():
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
# First, with no arguments.
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
result = index._get_values_for_csv()
tm.assert_numpy_array_equal(result, expected)
# No NaN values, so na_rep has no effect
result = index._get_values_for_csv(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
# Make sure date formatting works
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
result = index._get_values_for_csv(date_format="%m-%Y-%d")
tm.assert_numpy_array_equal(result, expected)
# NULL object handling should work
index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
result = index._get_values_for_csv(na_rep="NaT")
tm.assert_numpy_array_equal(result, expected)
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
result = index._get_values_for_csv(na_rep="pandas")
tm.assert_numpy_array_equal(result, expected)
result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
expected = np.array(
["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
dtype=object,
)
tm.assert_numpy_array_equal(result, expected)
# invalid format
result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
expected = np.array(["foo", "NaT", "foo"], dtype=object)
tm.assert_numpy_array_equal(result, expected)
class TestDatetimeIndexRendering:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_with_timezone_repr(self, tzstr):
rng = pd.date_range("4/13/2010", "5/6/2010")
rng_eastern = rng.tz_localize(tzstr)
rng_repr = repr(rng_eastern)
assert "2010-04-13 00:00:00" in rng_repr
def test_dti_repr_dates(self):
text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
assert "['2013-01-01'," in text
assert ", '2014-01-01']" in text
def test_dti_repr_mixed(self):
text = str(
pd.to_datetime(
[datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
)
)
assert "'2013-01-01 00:00:00'," in text
assert "'2014-01-01 00:00:00']" in text
def test_dti_repr_short(self):
dr = pd.date_range(start="1/1/2012", periods=1)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=2)
repr(dr)
dr = pd.date_range(start="1/1/2012", periods=3)
repr(dr)
@pytest.mark.parametrize(
"dates, freq, expected_repr",
[
(
["2012-01-01 00:00:00"],
"60min",
(
"DatetimeIndex(['2012-01-01 00:00:00'], "
"dtype='datetime64[ns]', freq='60min')"
),
),
(
["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
"60min",
"DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
"dtype='datetime64[ns]', freq='60min')",
),
(
["2012-01-01"],
"24h",
"DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
),
],
)
def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
# GH53634
dti = DatetimeIndex(dates, freq).as_unit(unit)
actual_repr = repr(dti)
assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
def test_dti_representation(self, unit):
idxs = []
idxs.append(DatetimeIndex([], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
)
idxs.append(
DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
)
idxs.append(
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
)
exp = []
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
"dtype='datetime64[ns]', freq='D')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
"'2011-01-01 10:00:00-05:00', 'NaT'], "
"dtype='datetime64[ns, US/Eastern]', freq=None)"
)
exp.append(
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
"'2011-01-01 10:00:00+00:00', 'NaT'], "
"dtype='datetime64[ns, UTC]', freq=None)"
""
)
with pd.option_context("display.width", 300):
for index, expected in zip(idxs, exp):
index = index.as_unit(unit)
expected = expected.replace("[ns", f"[{unit}")
result = repr(index)
assert result == expected
result = str(index)
assert result == expected
# TODO: this is a Series.__repr__ test
def test_dti_representation_to_series(self, unit):
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
exp1 = """Series([], dtype: datetime64[ns])"""
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
exp4 = (
"0 2011-01-01\n"
"1 2011-01-02\n"
"2 2011-01-03\n"
"dtype: datetime64[ns]"
)
exp5 = (
"0 2011-01-01 09:00:00+09:00\n"
"1 2011-01-01 10:00:00+09:00\n"
"2 2011-01-01 11:00:00+09:00\n"
"dtype: datetime64[ns, Asia/Tokyo]"
)
exp6 = (
"0 2011-01-01 09:00:00-05:00\n"
"1 2011-01-01 10:00:00-05:00\n"
"2 NaT\n"
"dtype: datetime64[ns, US/Eastern]"
)
exp7 = (
"0 2011-01-01 09:00:00\n"
"1 2011-01-02 10:15:00\n"
"dtype: datetime64[ns]"
)
with pd.option_context("display.width", 300):
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
):
ser = Series(idx.as_unit(unit))
result = repr(ser)
assert result == expected.replace("[ns", f"[{unit}")
def test_dti_summary(self):
# GH#9116
idx1 = DatetimeIndex([], freq="D")
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
idx5 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
freq="h",
tz="Asia/Tokyo",
)
idx6 = DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
)
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
exp5 = (
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
"to 2011-01-01 11:00:00+09:00\n"
"Freq: h"
)
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
for idx, expected in zip(
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
):
result = idx._summary()
assert result == expected
@pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_repr_etc_smoke(self, tz, freq):
# only really care that it works
dti = pd.bdate_range(
datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
)
repr(dti)
dti._summary()
dti[2:2]._summary()
class TestFormat:
def test_format(self):
# GH#35439
idx = pd.date_range("20130101", periods=5)
expected = [f"{x:%Y-%m-%d}" for x in idx]
msg = r"DatetimeIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
def test_format_with_name_time_info(self):
# bug I fixed 12/20/2011
dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dates.format(name=True)
assert formatted[0] == "something"
def test_format_datetime_with_time(self):
dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = dti.format()
expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
assert len(result) == 2
assert result == expected
def test_format_datetime(self):
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
assert formatted[0] == "2003-01-01 12:00:00"
assert formatted[1] == "NaT"
def test_format_date(self):
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
assert formatted[0] == "2003-01-01"
assert formatted[1] == "NaT"
def test_format_date_tz(self):
dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format()
assert formatted[0] == "2013-01-01 00:00:00+00:00"
dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format()
assert formatted[0] == "2013-01-01 00:00:00+00:00"
def test_format_date_explicit_date_format(self):
dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
assert formatted[0] == "02-01-2003"
assert formatted[1] == "UT"

View File

@ -0,0 +1,61 @@
import pytest
from pandas import (
DatetimeIndex,
date_range,
)
from pandas.tseries.offsets import (
BDay,
DateOffset,
Day,
Hour,
)
class TestFreq:
def test_freq_setter_errors(self):
# GH#20678
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
# setting with an incompatible freq
msg = (
"Inferred frequency 2D from passed values does not conform to "
"passed frequency 5D"
)
with pytest.raises(ValueError, match=msg):
idx._data.freq = "5D"
# setting with non-freq string
with pytest.raises(ValueError, match="Invalid frequency"):
idx._data.freq = "foo"
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_freq_setter(self, values, freq, tz):
# GH#20678
idx = DatetimeIndex(values, tz=tz)
# can set to an offset, converting from string if necessary
idx._data.freq = freq
assert idx.freq == freq
assert isinstance(idx.freq, DateOffset)
# can reset to None
idx._data.freq = None
assert idx.freq is None
def test_freq_view_safe(self):
# Setting the freq for one DatetimeIndex shouldn't alter the freq
# for another that views the same data
dti = date_range("2016-01-01", periods=5)
dta = dti._data
dti2 = DatetimeIndex(dta)._with_freq(None)
assert dti2.freq is None
# Original was not altered
assert dti.freq == "D"
assert dta.freq == "D"

View File

@ -0,0 +1,717 @@
from datetime import (
date,
datetime,
time,
timedelta,
)
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.compat.numpy import np_long
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Timestamp,
bdate_range,
date_range,
notna,
)
import pandas._testing as tm
from pandas.tseries.frequencies import to_offset
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestGetItem:
def test_getitem_slice_keeps_name(self):
# GH4226
st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
dr = date_range(st, et, freq="h", name="timebucket")
assert dr[1:].name == dr.name
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_getitem(self, tz):
idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
result = idx[0]
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx[0:5]
expected = date_range(
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[0:10:2]
expected = date_range(
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[-20:-5:3]
expected = date_range(
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx[4::-1]
expected = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
dtype=idx.dtype,
freq="-1D",
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_getitem(self, freq):
rng = bdate_range(START, END, freq=freq)
smaller = rng[:5]
exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
tm.assert_index_equal(smaller, exp)
assert smaller.freq == exp.freq
assert smaller.freq == rng.freq
sliced = rng[::5]
assert sliced.freq == to_offset(freq) * 5
fancy_indexed = rng[[4, 3, 2, 1, 0]]
assert len(fancy_indexed) == 5
assert isinstance(fancy_indexed, DatetimeIndex)
assert fancy_indexed.freq is None
# 32-bit vs. 64-bit platforms
assert rng[4] == rng[np_long(4)]
@pytest.mark.parametrize("freq", ["B", "C"])
def test_dti_business_getitem_matplotlib_hackaround(self, freq):
rng = bdate_range(START, END, freq=freq)
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
# GH#30588 multi-dimensional indexing deprecated
rng[:, None]
def test_getitem_int_list(self):
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
dti2 = dti[[1, 3, 5]]
v1 = dti2[0]
v2 = dti2[1]
v3 = dti2[2]
assert v1 == Timestamp("2/28/2005")
assert v2 == Timestamp("4/30/2005")
assert v3 == Timestamp("6/30/2005")
# getitem with non-slice drops freq
assert dti2.freq is None
class TestWhere:
def test_where_doesnt_retain_freq(self):
dti = date_range("20130101", periods=3, freq="D", name="idx")
cond = [True, True, False]
expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
result = dti.where(cond, dti[::-1])
tm.assert_index_equal(result, expected)
def test_where_other(self):
# other is ndarray or Index
i = date_range("20130101", periods=3, tz="US/Eastern")
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=arr)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2), i2._values)
tm.assert_index_equal(result, i2)
def test_where_invalid_dtypes(self):
dti = date_range("20130101", periods=3, tz="US/Eastern")
tail = dti[2:].tolist()
i2 = Index([pd.NaT, pd.NaT] + tail)
mask = notna(i2)
# passing tz-naive ndarray to tzaware DTI
result = dti.where(mask, i2.values)
expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
tm.assert_index_equal(result, expected)
# passing tz-aware DTI to tznaive DTI
naive = dti.tz_localize(None)
result = naive.where(mask, i2)
expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
tm.assert_index_equal(result, expected)
pi = i2.tz_localize(None).to_period("D")
result = dti.where(mask, pi)
expected = Index([pi[0], pi[1]] + tail, dtype=object)
tm.assert_index_equal(result, expected)
tda = i2.asi8.view("timedelta64[ns]")
result = dti.where(mask, tda)
expected = Index([tda[0], tda[1]] + tail, dtype=object)
assert isinstance(expected[0], np.timedelta64)
tm.assert_index_equal(result, expected)
result = dti.where(mask, i2.asi8)
expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
assert isinstance(expected[0], int)
tm.assert_index_equal(result, expected)
# non-matching scalar
td = pd.Timedelta(days=4)
result = dti.where(mask, td)
expected = Index([td, td] + tail, dtype=object)
assert expected[0] is td
tm.assert_index_equal(result, expected)
def test_where_mismatched_nat(self, tz_aware_fixture):
tz = tz_aware_fixture
dti = date_range("2013-01-01", periods=3, tz=tz)
cond = np.array([True, False, True])
tdnat = np.timedelta64("NaT", "ns")
expected = Index([dti[0], tdnat, dti[2]], dtype=object)
assert expected[1] is tdnat
result = dti.where(cond, tdnat)
tm.assert_index_equal(result, expected)
def test_where_tz(self):
i = date_range("20130101", periods=3, tz="US/Eastern")
result = i.where(notna(i))
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
result = i.where(notna(i2))
expected = i2
tm.assert_index_equal(result, expected)
class TestTake:
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_take_dont_lose_meta(self, tzstr):
rng = date_range("1/1/2000", periods=20, tz=tzstr)
result = rng.take(range(5))
assert result.tz == rng.tz
assert result.freq == rng.freq
def test_take_nan_first_datetime(self):
index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
result = index.take([-1, 0, 1])
expected = DatetimeIndex([index[-1], index[0], index[1]])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
def test_take(self, tz):
# GH#10295
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
result = idx.take([0])
assert result == Timestamp("2011-01-01", tz=idx.tz)
result = idx.take([0, 1, 2])
expected = date_range(
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = date_range(
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([7, 4, 1])
expected = date_range(
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
result = idx.take([3, 2, 5])
expected = DatetimeIndex(
["2011-01-04", "2011-01-03", "2011-01-06"],
dtype=idx.dtype,
freq=None,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
result = idx.take([-3, 2, 5])
expected = DatetimeIndex(
["2011-01-29", "2011-01-03", "2011-01-06"],
dtype=idx.dtype,
freq=None,
name="idx",
)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_take_invalid_kwargs(self):
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
indices = [1, 6, 5, 9, 10, 13, 15, 3]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
with pytest.raises(TypeError, match=msg):
idx.take(indices, foo=2)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, out=indices)
msg = "the 'mode' parameter is not supported"
with pytest.raises(ValueError, match=msg):
idx.take(indices, mode="clip")
# TODO: This method came from test_datetime; de-dup with version above
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
def test_take2(self, tz):
dates = [
datetime(2010, 1, 1, 14),
datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17),
datetime(2010, 1, 1, 21),
]
idx = date_range(
start="2010-01-01 09:00",
end="2010-02-01 09:00",
freq="h",
tz=tz,
name="idx",
)
expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, DatetimeIndex)
assert taken.freq is None
assert taken.tz == expected.tz
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
result = idx.take(np.array([1, 0, -1]))
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "out of bounds"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
def test_take_fill_value_with_timezone(self):
idx = DatetimeIndex(
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
result = idx.take(np.array([1, 0, -1]))
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = DatetimeIndex(
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
)
tm.assert_index_equal(result, expected)
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
msg = "out of bounds"
with pytest.raises(IndexError, match=msg):
idx.take(np.array([1, -5]))
class TestGetLoc:
def test_get_loc_key_unit_mismatch(self):
idx = date_range("2000-01-01", periods=3)
key = idx[1].as_unit("ms")
loc = idx.get_loc(key)
assert loc == 1
assert key in idx
def test_get_loc_key_unit_mismatch_not_castable(self):
dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
dti = DatetimeIndex(dta)
key = dta[0].as_unit("ns") + pd.Timedelta(1)
with pytest.raises(
KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
):
dti.get_loc(key)
assert key not in dti
def test_get_loc_time_obj(self):
# time indexing
idx = date_range("2000-01-01", periods=24, freq="h")
result = idx.get_loc(time(12))
expected = np.array([12])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
result = idx.get_loc(time(12, 30))
expected = np.array([])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
@pytest.mark.parametrize("offset", [-10, 10])
def test_get_loc_time_obj2(self, monkeypatch, offset):
# GH#8667
size_cutoff = 50
n = size_cutoff + offset
key = time(15, 11, 30)
start = key.hour * 3600 + key.minute * 60 + key.second
step = 24 * 3600
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
idx = date_range("2014-11-26", periods=n, freq="s")
ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
locs = np.arange(start, n, step, dtype=np.intp)
result = ts.index.get_loc(key)
tm.assert_numpy_array_equal(result, locs)
tm.assert_series_equal(ts[key], ts.iloc[locs])
left, right = ts.copy(), ts.copy()
left[key] *= -10
right.iloc[locs] *= -10
tm.assert_series_equal(left, right)
def test_get_loc_time_nat(self):
# GH#35114
# Case where key's total microseconds happens to match iNaT % 1e6 // 1000
tic = time(minute=12, second=43, microsecond=145224)
dti = DatetimeIndex([pd.NaT])
loc = dti.get_loc(tic)
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(loc, expected)
def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
assert index.get_loc(pd.NaT) == 1
assert index.get_loc(None) == 1
assert index.get_loc(np.nan) == 1
assert index.get_loc(pd.NA) == 1
assert index.get_loc(np.datetime64("NaT")) == 1
with pytest.raises(KeyError, match="NaT"):
index.get_loc(np.timedelta64("NaT"))
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
def test_get_loc_timedelta_invalid_key(self, key):
# GH#20464
dti = date_range("1970-01-01", periods=10)
msg = "Cannot index DatetimeIndex with [Tt]imedelta"
with pytest.raises(TypeError, match=msg):
dti.get_loc(key)
def test_get_loc_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
with pytest.raises(KeyError, match="2000"):
index.get_loc("1/1/2000")
def test_get_loc_year_str(self):
rng = date_range("1/1/2000", "1/1/2010")
result = rng.get_loc("2009")
expected = slice(3288, 3653)
assert result == expected
class TestContains:
def test_dti_contains_with_duplicates(self):
d = datetime(2011, 12, 5, 20, 30)
ix = DatetimeIndex([d, d])
assert d in ix
@pytest.mark.parametrize(
"vals",
[
[0, 1, 0],
[0, 0, -1],
[0, -1, -1],
["2015", "2015", "2016"],
["2015", "2015", "2014"],
],
)
def test_contains_nonunique(self, vals):
# GH#9512
idx = DatetimeIndex(vals)
assert idx[0] in idx
class TestGetIndexer:
def test_get_indexer_date_objs(self):
rng = date_range("1/1/2000", periods=20)
result = rng.get_indexer(rng.map(lambda x: x.date()))
expected = rng.get_indexer(rng)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer(self):
idx = date_range("2000-01-01", periods=3)
exp = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
tm.assert_numpy_array_equal(
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
)
tm.assert_numpy_array_equal(
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
np.array([0, -1, 1], dtype=np.intp),
)
tol_raw = [
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour"),
pd.Timedelta("1 hour").to_timedelta64(),
]
tm.assert_numpy_array_equal(
idx.get_indexer(
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
),
np.array([0, -1, 1], dtype=np.intp),
)
tol_bad = [
pd.Timedelta("2 hour").to_timedelta64(),
pd.Timedelta("1 hour").to_timedelta64(),
"foo",
]
msg = "Could not convert 'foo' to NumPy timedelta"
with pytest.raises(ValueError, match=msg):
idx.get_indexer(target, "nearest", tolerance=tol_bad)
with pytest.raises(ValueError, match="abbreviation w/o a number"):
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
@pytest.mark.parametrize(
"target",
[
[date(2020, 1, 1), Timestamp("2020-01-02")],
[Timestamp("2020-01-01"), date(2020, 1, 2)],
],
)
def test_get_indexer_mixed_dtypes(self, target):
# https://github.com/pandas-dev/pandas/issues/33741
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
result = values.get_indexer(target)
expected = np.array([0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"target, positions",
[
([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
],
)
def test_get_indexer_out_of_bounds_date(self, target, positions):
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
result = values.get_indexer(target)
expected = np.array(positions, dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_pad_requires_monotonicity(self):
rng = date_range("1/1/2000", "3/1/2000", freq="B")
# neither monotonic increasing or decreasing
rng2 = rng[[1, 0, 2]]
msg = "index must be monotonic increasing or decreasing"
with pytest.raises(ValueError, match=msg):
rng2.get_indexer(rng, method="pad")
class TestMaybeCastSliceBound:
def test_maybe_cast_slice_bounds_empty(self):
# GH#14354
empty_idx = date_range(freq="1h", periods=0, end="2015")
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
exp = Timestamp("2015-01-02 23:59:59.999999999")
assert right == exp
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
exp = Timestamp("2015-01-02 00:00:00")
assert left == exp
def test_maybe_cast_slice_duplicate_monotonic(self):
# https://github.com/pandas-dev/pandas/issues/16515
idx = DatetimeIndex(["2017", "2017"])
result = idx._maybe_cast_slice_bound("2017-01-01", "left")
expected = Timestamp("2017-01-01")
assert result == expected
class TestGetSliceBounds:
@pytest.mark.parametrize("box", [date, datetime, Timestamp])
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
def test_get_slice_bounds_datetime_within(
self, box, side, expected, tz_aware_fixture
):
# GH 35690
tz = tz_aware_fixture
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
key = box(year=2000, month=1, day=7)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.get_slice_bound(key, side=side)
else:
result = index.get_slice_bound(key, side=side)
assert result == expected
@pytest.mark.parametrize("box", [datetime, Timestamp])
@pytest.mark.parametrize("side", ["left", "right"])
@pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
def test_get_slice_bounds_datetime_outside(
self, box, side, year, expected, tz_aware_fixture
):
# GH 35690
tz = tz_aware_fixture
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
key = box(year=year, month=1, day=7)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.get_slice_bound(key, side=side)
else:
result = index.get_slice_bound(key, side=side)
assert result == expected
@pytest.mark.parametrize("box", [datetime, Timestamp])
def test_slice_datetime_locs(self, box, tz_aware_fixture):
# GH 34077
tz = tz_aware_fixture
index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
key = box(2010, 1, 1)
if tz is not None:
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 we require tzawareness-compat as of 2.0
index.slice_locs(key, box(2010, 1, 2))
else:
result = index.slice_locs(key, box(2010, 1, 2))
expected = (0, 1)
assert result == expected
class TestIndexerBetweenTime:
def test_indexer_between_time(self):
# GH#11818
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
with pytest.raises(ValueError, match=msg):
rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
@pytest.mark.parametrize("unit", ["us", "ms", "s"])
def test_indexer_between_time_non_nano(self, unit):
# For simple cases like this, the non-nano indexer_between_time
# should match the nano result
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
arr_nano = rng._data._ndarray
arr = arr_nano.astype(f"M8[{unit}]")
dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
dti = DatetimeIndex(dta)
assert dti.dtype == arr.dtype
tic = time(1, 25)
toc = time(2, 29)
result = dti.indexer_between_time(tic, toc)
expected = rng.indexer_between_time(tic, toc)
tm.assert_numpy_array_equal(result, expected)
# case with non-zero micros in arguments
tic = time(1, 25, 0, 45678)
toc = time(2, 29, 0, 1234)
result = dti.indexer_between_time(tic, toc)
expected = rng.indexer_between_time(tic, toc)
tm.assert_numpy_array_equal(result, expected)

View File

@ -0,0 +1,76 @@
import dateutil.tz
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
date_range,
to_datetime,
)
from pandas.core.arrays import datetimes
class TestDatetimeIndexIteration:
@pytest.mark.parametrize(
"tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
)
def test_iteration_preserves_nanoseconds(self, tz):
# GH#19603
index = DatetimeIndex(
["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
)
for i, ts in enumerate(index):
assert ts == index[i] # pylint: disable=unnecessary-list-index-lookup
def test_iter_readonly(self):
# GH#28055 ints_to_pydatetime with readonly array
arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
arr.setflags(write=False)
dti = to_datetime(arr)
list(dti)
def test_iteration_preserves_tz(self):
# see GH#8890
index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result == expected
def test_iteration_preserves_tz2(self):
index = date_range(
"2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
)
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result._repr_base == expected._repr_base
assert result == expected
def test_iteration_preserves_tz3(self):
# GH#9100
index = DatetimeIndex(
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
)
for i, ts in enumerate(index):
result = ts
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
assert result._repr_base == expected._repr_base
assert result == expected
@pytest.mark.parametrize("offset", [-5, -1, 0, 1])
def test_iteration_over_chunksize(self, offset, monkeypatch):
# GH#21012
chunksize = 5
index = date_range(
"2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
)
num = 0
with monkeypatch.context() as m:
m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
for stamp in index:
assert index[num] == stamp
num += 1
assert num == len(index)

View File

@ -0,0 +1,149 @@
from datetime import (
datetime,
timezone,
)
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Timestamp,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm
from pandas.tseries.offsets import (
BDay,
BMonthEnd,
)
class TestJoin:
def test_does_not_convert_mixed_integer(self):
df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
cols = df.columns.join(df.index, how="outer")
joined = cols.join(df.columns)
assert cols.dtype == np.dtype("O")
assert cols.dtype == joined.dtype
tm.assert_numpy_array_equal(cols.values, joined.values)
def test_join_self(self, join_type):
index = date_range("1/1/2000", periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def test_join_with_period_index(self, join_type):
df = DataFrame(
np.ones((10, 2)),
index=date_range("2020-01-01", periods=10),
columns=period_range("2020-01-01", periods=2),
)
s = df.iloc[:5, 0]
expected = df.columns.astype("O").join(s.index, how=join_type)
result = df.columns.join(s.index, how=join_type)
tm.assert_index_equal(expected, result)
def test_join_object_index(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])
result = rng.join(idx, how="outer")
assert isinstance(result[0], Timestamp)
def test_join_utc_convert(self, join_type):
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
left = rng.tz_convert("US/Eastern")
right = rng.tz_convert("Europe/Berlin")
result = left.join(left[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz == left.tz
result = left.join(right[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz is timezone.utc
def test_datetimeindex_union_join_empty(self, sort):
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
empty = Index([])
result = dti.union(empty, sort=sort)
expected = dti.astype("O")
tm.assert_index_equal(result, expected)
result = dti.join(empty)
assert isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, dti)
def test_join_nonunique(self):
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
rs = idx1.join(idx2, how="outer")
assert rs.is_monotonic_increasing
@pytest.mark.parametrize("freq", ["B", "C"])
def test_outer_join(self, freq):
# should just behave as union
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
rng = date_range(start=start, end=end, freq=freq)
# overlapping
left = rng[:10]
right = rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
# overlapping, but different offset
other = date_range(start, end, freq=BMonthEnd())
the_join = rng.join(other, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None
def test_naive_aware_conflicts(self):
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
naive = date_range(start, end, freq=BDay(), tz=None)
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
msg = "tz-naive.*tz-aware"
with pytest.raises(TypeError, match=msg):
naive.join(aware)
with pytest.raises(TypeError, match=msg):
aware.join(naive)
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_join_preserves_freq(self, tz):
# GH#32157
dti = date_range("2016-01-01", periods=10, tz=tz)
result = dti[:5].join(dti[5:], how="outer")
assert result.freq == dti.freq
tm.assert_index_equal(result, dti)
result = dti[:5].join(dti[6:], how="outer")
assert result.freq is None
expected = dti.delete(5)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,13 @@
import numpy as np
from pandas import date_range
import pandas._testing as tm
class TestSplit:
def test_split_non_utc(self):
# GH#14042
indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
result = np.split(indices, indices_or_sections=[])[0]
expected = indices._with_freq(None)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,56 @@
from datetime import datetime
import pytest
from pandas import (
DatetimeIndex,
Index,
bdate_range,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexOps:
def test_infer_freq(self, freq_sample):
# GH 11018
idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
result = DatetimeIndex(idx.asi8, freq="infer")
tm.assert_index_equal(idx, result)
assert result.freq == freq_sample
@pytest.mark.parametrize("freq", ["B", "C"])
class TestBusinessDatetimeIndex:
@pytest.fixture
def rng(self, freq):
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
return bdate_range(START, END, freq=freq)
def test_comparison(self, rng):
d = rng[10]
comp = rng > d
assert comp[11]
assert not comp[9]
def test_copy(self, rng):
cp = rng.copy()
tm.assert_index_equal(cp, rng)
def test_identical(self, rng):
t1 = rng.copy()
t2 = rng.copy()
assert t1.identical(t2)
# name
t1 = t1.rename("foo")
assert t1.equals(t2)
assert not t1.identical(t2)
t2 = t2.rename("foo")
assert t1.identical(t2)
# freq
t2v = Index(t2.values)
assert t1.equals(t2v)
assert not t1.identical(t2v)

View File

@ -0,0 +1,466 @@
""" test partial slicing on Series/Frame """
from datetime import datetime
import numpy as np
import pytest
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSlicing:
def test_string_index_series_name_converted(self):
# GH#1644
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
index=date_range("1/1/2000", periods=10),
)
result = df.loc["1/3/2000"]
assert result.name == df.index[2]
result = df.T["1/3/2000"]
assert result.name == df.index[2]
def test_stringified_slice_with_tz(self):
# GH#2658
start = "2013-01-07"
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
df = DataFrame(np.arange(10), index=idx)
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
def test_return_type_doesnt_depend_on_monotonicity(self):
# GH#24892 we get Series back regardless of whether our DTI is monotonic
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
ser = Series(range(3), index=dti)
# non-monotonic index
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
# key with resolution strictly lower than "min"
key = "2015-5-14 00"
# monotonic increasing index
result = ser.loc[key]
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
# monotonic decreasing index
result = ser.iloc[::-1].loc[key]
expected = ser.iloc[::-1][:-1]
tm.assert_series_equal(result, expected)
# non-monotonic index
result2 = ser2.loc[key]
expected2 = ser2.iloc[::2]
tm.assert_series_equal(result2, expected2)
def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
# GH#24892 we get Series back regardless of whether our DTI is monotonic
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
ser = Series(range(3), index=dti)
# non-monotonic index
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
# key with resolution strictly *higher) than "min"
key = "2015-5-14 00:00:00"
# monotonic increasing index
result = ser.loc[key]
assert result == 1
# monotonic decreasing index
result = ser.iloc[::-1].loc[key]
assert result == 1
# non-monotonic index
result2 = ser2.loc[key]
assert result2 == 0
def test_monotone_DTI_indexing_bug(self):
# GH 19362
# Testing accessing the first element in a monotonic descending
# partial string indexing.
df = DataFrame(list(range(5)))
date_list = [
"2018-01-02",
"2017-02-10",
"2016-03-10",
"2015-03-15",
"2014-03-16",
]
date_index = DatetimeIndex(date_list)
df["date"] = date_index
expected = DataFrame({0: list(range(5)), "date": date_index})
tm.assert_frame_equal(df, expected)
# We get a slice because df.index's resolution is hourly and we
# are slicing with a daily-resolution string. If both were daily,
# we would get a single item back
dti = date_range("20170101 01:00:00", periods=3)
df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
result = df.loc["2017-01-03"]
tm.assert_frame_equal(result, expected)
result2 = df.iloc[::-1].loc["2017-01-03"]
expected2 = expected.iloc[::-1]
tm.assert_frame_equal(result2, expected2)
def test_slice_year(self):
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
result = s["2005"]
expected = s[s.index.year == 2005]
tm.assert_series_equal(result, expected)
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
result = df.loc["2005"]
expected = df[df.index.year == 2005]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"partial_dtime",
[
"2019",
"2019Q4",
"Dec 2019",
"2019-12-31",
"2019-12-31 23",
"2019-12-31 23:59",
],
)
def test_slice_end_of_period_resolution(self, partial_dtime):
# GH#31064
dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
ser = Series(range(10), index=dti)
result = ser[partial_dtime]
expected = ser.iloc[:5]
tm.assert_series_equal(result, expected)
def test_slice_quarter(self):
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2001Q1"]) == 90
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
assert len(df.loc["1Q01"]) == 90
def test_slice_month(self):
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
assert len(s["2005-11"]) == 30
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
assert len(df.loc["2005-11"]) == 30
tm.assert_series_equal(s["2005-11"], s["11-2005"])
def test_partial_slice(self):
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-05":"2006-02"]
expected = s["20050501":"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-05":]
expected = s["20050501":]
tm.assert_series_equal(result, expected)
result = s[:"2006-02"]
expected = s[:"20060228"]
tm.assert_series_equal(result, expected)
result = s["2005-1-1"]
assert result == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
s["2004-12-31"]
def test_partial_slice_daily(self):
rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-31"]
tm.assert_series_equal(result, s.iloc[:24])
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
s["2004-12-31 00"]
def test_partial_slice_hourly(self):
rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[: 60 * 4])
result = s["2005-1-1 20"]
tm.assert_series_equal(result, s.iloc[:60])
assert s["2005-1-1 20:00"] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
s["2004-12-31 00:15"]
def test_partial_slice_minutely(self):
rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s["2005-1-1 23:59"]
tm.assert_series_equal(result, s.iloc[:60])
result = s["2005-1-1"]
tm.assert_series_equal(result, s.iloc[:60])
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
s["2004-12-31 00:00:00"]
def test_partial_slice_second_precision(self):
rng = date_range(
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
periods=20,
freq="us",
)
s = Series(np.arange(20), rng)
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
s["2005-1-1 00:00:00"]
def test_partial_slicing_dataframe(self):
# GH14856
# Test various combinations of string slicing resolution vs.
# index resolution
# - If string resolution is less precise than index resolution,
# string is considered a slice
# - If string resolution is equal to or more precise than index
# resolution, string is considered an exact match
formats = [
"%Y",
"%Y-%m",
"%Y-%m-%d",
"%Y-%m-%d %H",
"%Y-%m-%d %H:%M",
"%Y-%m-%d %H:%M:%S",
]
resolutions = ["year", "month", "day", "hour", "minute", "second"]
for rnum, resolution in enumerate(resolutions[2:], 2):
# we check only 'day', 'hour', 'minute' and 'second'
unit = Timedelta("1 " + resolution)
middate = datetime(2012, 1, 1, 0, 0, 0)
index = DatetimeIndex([middate - unit, middate, middate + unit])
values = [1, 2, 3]
df = DataFrame({"a": values}, index, dtype=np.int64)
assert df.index.resolution == resolution
# Timestamp with the same resolution as index
# Should be exact match for Series (return scalar)
# and raise KeyError for Frame
for timestamp, expected in zip(index, values):
ts_string = timestamp.strftime(formats[rnum])
# make ts_string as precise as index
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == expected
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Timestamp with resolution less precise than index
for fmt in formats[:rnum]:
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
ts_string = index[element].strftime(fmt)
# Series should return slice
result = df["a"][ts_string]
expected = df["a"][theslice]
tm.assert_series_equal(result, expected)
# pre-2.0 df[ts_string] was overloaded to interpret this
# as slicing along index
with pytest.raises(KeyError, match=ts_string):
df[ts_string]
# Timestamp with resolution more precise than index
# Compatible with existing key
# Should return scalar for Series
# and raise KeyError for Frame
for fmt in formats[rnum + 1 :]:
ts_string = index[1].strftime(fmt)
result = df["a"][ts_string]
assert isinstance(result, np.int64)
assert result == 2
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df[ts_string]
# Not compatible with existing key
# Should raise KeyError
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
ts = index[1] + Timedelta("1 " + res)
ts_string = ts.strftime(fmt)
msg = rf"^'{ts_string}'$"
with pytest.raises(KeyError, match=msg):
df["a"][ts_string]
with pytest.raises(KeyError, match=msg):
df[ts_string]
def test_partial_slicing_with_multiindex(self):
# GH 4758
# partial string indexing with a multi-index buggy
df = DataFrame(
{
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
"val": [1, 2, 3, 4],
},
index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
)
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
expected = DataFrame(
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
)
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
tm.assert_frame_equal(result, expected)
expected = df_multi.loc[
(Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
]
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
tm.assert_series_equal(result, expected)
# partial string indexing on first level, scalar indexing on the other two
result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
expected = df_multi.iloc[:1].droplevel([1, 2])
tm.assert_frame_equal(result, expected)
def test_partial_slicing_with_multiindex_series(self):
# GH 4294
# partial slice on a series mi
ser = Series(
range(250),
index=MultiIndex.from_product(
[date_range("2000-1-1", periods=50), range(5)]
),
)
s2 = ser[:-1].copy()
expected = s2["2000-1-4"]
result = s2[Timestamp("2000-1-4")]
tm.assert_series_equal(result, expected)
result = ser[Timestamp("2000-1-4")]
expected = ser["2000-1-4"]
tm.assert_series_equal(result, expected)
df2 = DataFrame(ser)
expected = df2.xs("2000-1-4")
result = df2.loc[Timestamp("2000-1-4")]
tm.assert_frame_equal(result, expected)
def test_partial_slice_requires_monotonicity(self):
# Disallowed since 2.0 (GH 37819)
ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
nonmonotonic = ser.iloc[[3, 5, 4]]
timestamp = Timestamp("2014-01-10")
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
nonmonotonic["2014-01-10":]
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic[timestamp:]
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
nonmonotonic.loc["2014-01-10":]
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
nonmonotonic.loc[timestamp:]
def test_loc_datetime_length_one(self):
# GH16071
df = DataFrame(
columns=["1"],
index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
)
result = df.loc[datetime(2016, 10, 1) :]
tm.assert_frame_equal(result, df)
result = df.loc["2016-10-01T00:00:00":]
tm.assert_frame_equal(result, df)
@pytest.mark.parametrize(
"start",
[
"2018-12-02 21:50:00+00:00",
Timestamp("2018-12-02 21:50:00+00:00"),
Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
],
)
@pytest.mark.parametrize(
"end",
[
"2018-12-02 21:52:00+00:00",
Timestamp("2018-12-02 21:52:00+00:00"),
Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
],
)
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
# GH 24076
idx = date_range(
start="2018-12-02 14:50:00-07:00",
end="2018-12-02 14:50:00-07:00",
freq="1min",
)
df = DataFrame(1, index=idx, columns=["A"])
result = df[start:end]
expected = df.iloc[0:3, :]
tm.assert_frame_equal(result, expected)
# GH 16785
start = str(start)
end = str(end)
with pytest.raises(ValueError, match="Both dates must"):
df[start : end[:-4] + "1:00"]
with pytest.raises(ValueError, match="The index must be timezone"):
df = df.tz_localize(None)
df[start:end]
def test_slice_reduce_to_series(self):
# GH 27516
df = DataFrame(
{"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
)
expected = Series(
range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,45 @@
import pytest
from pandas import (
NaT,
date_range,
to_datetime,
)
import pandas._testing as tm
class TestPickle:
def test_pickle(self):
# GH#4606
idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
idx_p = tm.round_trip_pickle(idx)
assert idx_p[0] == idx[0]
assert idx_p[1] is NaT
assert idx_p[2] == idx[2]
def test_pickle_dont_infer_freq(self):
# GH#11002
# don't infer freq
idx = date_range("1750-1-1", "2050-1-1", freq="7D")
idx_p = tm.round_trip_pickle(idx)
tm.assert_index_equal(idx, idx_p)
def test_pickle_after_set_freq(self):
dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
dti = dti._with_freq(None)
res = tm.round_trip_pickle(dti)
tm.assert_index_equal(res, dti)
def test_roundtrip_pickle_with_tz(self):
# GH#8367
# round-trip of timezone
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
unpickled = tm.round_trip_pickle(index)
tm.assert_index_equal(index, unpickled)
@pytest.mark.parametrize("freq", ["B", "C"])
def test_pickle_unpickle(self, freq):
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
unpickled = tm.round_trip_pickle(rng)
assert unpickled.freq == freq

View File

@ -0,0 +1,56 @@
from datetime import timedelta
import numpy as np
from pandas import (
DatetimeIndex,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndexReindex:
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
# GH#7774
index = date_range("2013-01-01", periods=3, tz="US/Eastern")
assert str(index.reindex([])[0].tz) == "US/Eastern"
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
def test_reindex_with_same_tz_nearest(self):
# GH#32740
rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
result1, result2 = rng_a.reindex(
rng_b, method="nearest", tolerance=timedelta(seconds=20)
)
expected_list1 = [
"2010-01-01 00:00:00",
"2010-01-01 01:05:27.272727272",
"2010-01-01 02:10:54.545454545",
"2010-01-01 03:16:21.818181818",
"2010-01-01 04:21:49.090909090",
"2010-01-01 05:27:16.363636363",
"2010-01-01 06:32:43.636363636",
"2010-01-01 07:38:10.909090909",
"2010-01-01 08:43:38.181818181",
"2010-01-01 09:49:05.454545454",
"2010-01-01 10:54:32.727272727",
"2010-01-01 12:00:00",
"2010-01-01 13:05:27.272727272",
"2010-01-01 14:10:54.545454545",
"2010-01-01 15:16:21.818181818",
"2010-01-01 16:21:49.090909090",
"2010-01-01 17:27:16.363636363",
"2010-01-01 18:32:43.636363636",
"2010-01-01 19:38:10.909090909",
"2010-01-01 20:43:38.181818181",
"2010-01-01 21:49:05.454545454",
"2010-01-01 22:54:32.727272727",
"2010-01-02 00:00:00",
]
expected1 = DatetimeIndex(
expected_list1, dtype="datetime64[ns, UTC]", freq=None
)
expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
tm.assert_index_equal(result1, expected1)
tm.assert_numpy_array_equal(result2, expected2)

View File

@ -0,0 +1,329 @@
"""
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
"""
import calendar
from datetime import (
date,
datetime,
time,
)
import locale
import unicodedata
import numpy as np
import pytest
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
Index,
NaT,
Timestamp,
date_range,
offsets,
)
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
class TestDatetimeIndexOps:
def test_dti_no_millisecond_field(self):
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex.millisecond
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
with pytest.raises(AttributeError, match=msg):
DatetimeIndex([]).millisecond
def test_dti_time(self):
rng = date_range("1/1/2000", freq="12min", periods=10)
result = Index(rng).time
expected = [t.time() for t in rng]
assert (result == expected).all()
def test_dti_date(self):
rng = date_range("1/1/2000", freq="12h", periods=10)
result = Index(rng).date
expected = [t.date() for t in rng]
assert (result == expected).all()
@pytest.mark.parametrize(
"dtype",
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
)
def test_dti_date2(self, dtype):
# Regression test for GH#21230
expected = np.array([date(2018, 6, 4), NaT])
index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
result = index.date
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
)
def test_dti_time2(self, dtype):
# Regression test for GH#21267
expected = np.array([time(10, 20, 30), NaT])
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
result = index.time
tm.assert_numpy_array_equal(result, expected)
def test_dti_timetz(self, tz_naive_fixture):
# GH#21358
tz = timezones.maybe_get_tz(tz_naive_fixture)
expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
result = index.timetz
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"field",
[
"dayofweek",
"day_of_week",
"dayofyear",
"day_of_year",
"quarter",
"days_in_month",
"is_month_start",
"is_month_end",
"is_quarter_start",
"is_quarter_end",
"is_year_start",
"is_year_end",
],
)
def test_dti_timestamp_fields(self, field):
# extra fields from DatetimeIndex like quarter and week
idx = date_range("2020-01-01", periods=10)
expected = getattr(idx, field)[-1]
result = getattr(Timestamp(idx[-1]), field)
assert result == expected
def test_dti_nanosecond(self):
dti = DatetimeIndex(np.arange(10))
expected = Index(np.arange(10, dtype=np.int32))
tm.assert_index_equal(dti.nanosecond, expected)
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_hour_tzaware(self, prefix):
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
assert (rng.hour == 0).all()
# a more unusual time zone, GH#1946
dr = date_range(
"2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
)
expected = Index(np.arange(10, dtype=np.int32))
tm.assert_index_equal(dr.hour, expected)
# GH#12806
# error: Unsupported operand types for + ("List[None]" and "List[str]")
@pytest.mark.parametrize(
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
)
def test_day_name_month_name(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
# GH#11128
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert dti.day_name(locale=time_locale)[day] == name
assert dti.day_name(locale=None)[day] == eng_name
ts = Timestamp(datetime(2016, 4, day))
assert ts.day_name(locale=time_locale) == name
dti = dti.append(DatetimeIndex([NaT]))
assert np.isnan(dti.day_name(locale=time_locale)[-1])
ts = Timestamp(NaT)
assert np.isnan(ts.day_name(locale=time_locale))
# GH#12805
dti = date_range(freq="ME", start="2012", end="2013")
result = dti.month_name(locale=time_locale)
expected = Index([month.capitalize() for month in expected_months])
# work around different normalization schemes GH#22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_index_equal(result, expected)
for item, expected in zip(dti, expected_months):
result = item.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", result)
assert result == expected
dti = dti.append(DatetimeIndex([NaT]))
assert np.isnan(dti.month_name(locale=time_locale)[-1])
def test_dti_week(self):
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
# return the same weekofyear accessor close to new year w/ tz
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
dates = DatetimeIndex(dates, tz="Europe/Brussels")
expected = [52, 1, 1]
assert dates.isocalendar().week.tolist() == expected
assert [d.weekofyear for d in dates] == expected
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
def test_dti_fields(self, tz):
# GH#13303
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
assert dti.year[0] == 1998
assert dti.month[0] == 1
assert dti.day[0] == 1
assert dti.hour[0] == 0
assert dti.minute[0] == 0
assert dti.second[0] == 0
assert dti.microsecond[0] == 0
assert dti.dayofweek[0] == 3
assert dti.dayofyear[0] == 1
assert dti.dayofyear[120] == 121
assert dti.isocalendar().week.iloc[0] == 1
assert dti.isocalendar().week.iloc[120] == 18
assert dti.quarter[0] == 1
assert dti.quarter[120] == 2
assert dti.days_in_month[0] == 31
assert dti.days_in_month[90] == 30
assert dti.is_month_start[0]
assert not dti.is_month_start[1]
assert dti.is_month_start[31]
assert dti.is_quarter_start[0]
assert dti.is_quarter_start[90]
assert dti.is_year_start[0]
assert not dti.is_year_start[364]
assert not dti.is_month_end[0]
assert dti.is_month_end[30]
assert not dti.is_month_end[31]
assert dti.is_month_end[364]
assert not dti.is_quarter_end[0]
assert not dti.is_quarter_end[30]
assert dti.is_quarter_end[89]
assert dti.is_quarter_end[364]
assert not dti.is_year_end[0]
assert dti.is_year_end[364]
assert len(dti.year) == 365
assert len(dti.month) == 365
assert len(dti.day) == 365
assert len(dti.hour) == 365
assert len(dti.minute) == 365
assert len(dti.second) == 365
assert len(dti.microsecond) == 365
assert len(dti.dayofweek) == 365
assert len(dti.dayofyear) == 365
assert len(dti.isocalendar()) == 365
assert len(dti.quarter) == 365
assert len(dti.is_month_start) == 365
assert len(dti.is_month_end) == 365
assert len(dti.is_quarter_start) == 365
assert len(dti.is_quarter_end) == 365
assert len(dti.is_year_start) == 365
assert len(dti.is_year_end) == 365
dti.name = "name"
# non boolean accessors -> return Index
for accessor in DatetimeArray._field_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, Index)
assert res.name == "name"
# boolean accessors -> return array
for accessor in DatetimeArray._bool_ops:
res = getattr(dti, accessor)
assert len(res) == 365
assert isinstance(res, np.ndarray)
# test boolean indexing
res = dti[dti.is_quarter_start]
exp = dti[[0, 90, 181, 273]]
tm.assert_index_equal(res, exp)
res = dti[dti.is_leap_year]
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
tm.assert_index_equal(res, exp)
def test_dti_is_year_quarter_start(self):
dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
assert sum(dti.is_quarter_start) == 0
assert sum(dti.is_quarter_end) == 4
assert sum(dti.is_year_start) == 0
assert sum(dti.is_year_end) == 1
def test_dti_is_month_start(self):
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
assert dti.is_month_start[0] == 1
def test_dti_is_month_start_custom(self):
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
msg = "Custom business days is not supported by is_month_start"
with pytest.raises(ValueError, match=msg):
dti.is_month_start

View File

@ -0,0 +1,666 @@
from datetime import (
datetime,
timezone,
)
import numpy as np
import pytest
import pytz
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
bdate_range,
date_range,
)
import pandas._testing as tm
from pandas.tseries.offsets import (
BMonthEnd,
Minute,
MonthEnd,
)
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
class TestDatetimeIndexSetOps:
tz = [
None,
"UTC",
"Asia/Tokyo",
"US/Eastern",
"dateutil/Asia/Singapore",
"dateutil/US/Pacific",
]
# TODO: moved from test_datetimelike; dedup with version below
def test_union2(self, sort):
everything = date_range("2020-01-01", periods=10)
first = everything[:5]
second = everything[5:]
union = first.union(second, sort=sort)
tm.assert_index_equal(union, everything)
@pytest.mark.parametrize("box", [np.array, Series, list])
def test_union3(self, sort, box):
everything = date_range("2020-01-01", periods=10)
first = everything[:5]
second = everything[5:]
# GH 10149 support listlike inputs other than Index objects
expected = first.union(second, sort=sort)
case = box(second.values)
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("tz", tz)
def test_union(self, tz, sort):
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
other3 = DatetimeIndex([], tz=tz).as_unit("ns")
expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
expected3_notsorted = rng3
for rng, other, exp, exp_notsorted in [
(rng1, other1, expected1, expected1_notsorted),
(rng2, other2, expected2, expected2_notsorted),
(rng3, other3, expected3, expected3_notsorted),
]:
result_union = rng.union(other, sort=sort)
tm.assert_index_equal(result_union, exp)
result_union = other.union(rng, sort=sort)
if sort is None:
tm.assert_index_equal(result_union, exp)
else:
tm.assert_index_equal(result_union, exp_notsorted)
def test_union_coverage(self, sort):
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
result = ordered.union(idx, sort=sort)
tm.assert_index_equal(result, ordered)
result = ordered[:0].union(ordered, sort=sort)
tm.assert_index_equal(result, ordered)
assert result.freq == ordered.freq
def test_union_bug_1730(self, sort):
rng_a = date_range("1/1/2012", periods=4, freq="3h")
rng_b = date_range("1/1/2012", periods=4, freq="4h")
result = rng_a.union(rng_b, sort=sort)
exp = list(rng_a) + list(rng_b[1:])
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
def test_union_bug_1745(self, sort):
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
right = DatetimeIndex(
[
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
result = left.union(right, sort=sort)
exp = DatetimeIndex(
[
"2012-05-11 15:19:49.695000",
"2012-05-29 13:04:21.322000",
"2012-05-11 15:27:24.873000",
"2012-05-11 15:31:05.350000",
]
)
if sort is None:
exp = exp.sort_values()
tm.assert_index_equal(result, exp)
def test_union_bug_4564(self, sort):
from pandas import DateOffset
left = date_range("2013-01-01", "2013-02-01")
right = left + DateOffset(minutes=15)
result = left.union(right, sort=sort)
exp = list(left) + list(right)
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)
def test_union_freq_both_none(self, sort):
# GH11086
expected = bdate_range("20150101", periods=10)
expected._data.freq = None
result = expected.union(expected, sort=sort)
tm.assert_index_equal(result, expected)
assert result.freq is None
def test_union_freq_infer(self):
# When taking the union of two DatetimeIndexes, we infer
# a freq even if the arguments don't have freq. This matches
# TimedeltaIndex behavior.
dti = date_range("2016-01-01", periods=5)
left = dti[[0, 1, 3, 4]]
right = dti[[2, 3, 1]]
assert left.freq is None
assert right.freq is None
result = left.union(right)
tm.assert_index_equal(result, dti)
assert result.freq == "D"
def test_union_dataframe_index(self):
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
df = DataFrame({"s1": s1, "s2": s2})
exp = date_range("1/1/1980", "1/1/2012", freq="MS")
tm.assert_index_equal(df.index, exp)
def test_union_with_DatetimeIndex(self, sort):
i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
# Works
i1.union(i2, sort=sort)
# Fails with "AttributeError: can't set attribute"
i2.union(i1, sort=sort)
def test_union_same_timezone_different_units(self):
# GH 55238
idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
result = idx1.union(idx2)
expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
tm.assert_index_equal(result, expected)
# TODO: moved from test_datetimelike; de-duplicate with version below
def test_intersection2(self):
first = date_range("2020-01-01", periods=10)
second = first[5:]
intersect = first.intersection(second)
tm.assert_index_equal(intersect, second)
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.intersection(case)
tm.assert_index_equal(result, second)
third = Index(["a", "b", "c"])
result = first.intersection(third)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
)
def test_intersection(self, tz, sort):
# GH 4690 (with tz)
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
# if target has the same name, it is preserved
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
# if target name is different, it will be reset
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
for rng, expected in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng)
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
# non-monotonic
base = DatetimeIndex(
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
).as_unit("ns")
rng2 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
).as_unit("ns")
expected2 = DatetimeIndex(
["2011-01-04", "2011-01-02"], tz=tz, name="idx"
).as_unit("ns")
rng3 = DatetimeIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
tz=tz,
name="other",
).as_unit("ns")
expected3 = DatetimeIndex(
["2011-01-04", "2011-01-02"], tz=tz, name=None
).as_unit("ns")
# GH 7880
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
assert expected4.freq is None
for rng, expected in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
# parametrize over both anchored and non-anchored freqs, as they
# have different code paths
@pytest.mark.parametrize("freq", ["min", "B"])
def test_intersection_empty(self, tz_aware_fixture, freq):
# empty same freq GH2129
tz = tz_aware_fixture
rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
result = rng[0:0].intersection(rng)
assert len(result) == 0
assert result.freq == rng.freq
result = rng.intersection(rng[0:0])
assert len(result) == 0
assert result.freq == rng.freq
# no overlap GH#33604
check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
result = rng[:3].intersection(rng[-3:])
tm.assert_index_equal(result, rng[:0])
if check_freq:
# We don't preserve freq on non-anchored offsets
assert result.freq == rng.freq
# swapped left and right
result = rng[-3:].intersection(rng[:3])
tm.assert_index_equal(result, rng[:0])
if check_freq:
# We don't preserve freq on non-anchored offsets
assert result.freq == rng.freq
def test_intersection_bug_1708(self):
from pandas import DateOffset
index_1 = date_range("1/1/2012", periods=4, freq="12h")
index_2 = index_1 + DateOffset(hours=1)
result = index_1.intersection(index_2)
assert len(result) == 0
@pytest.mark.parametrize("tz", tz)
def test_difference(self, tz, sort):
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
rng1 = DatetimeIndex(rng_dates, tz=tz)
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
expected1 = DatetimeIndex(rng_dates, tz=tz)
rng2 = DatetimeIndex(rng_dates, tz=tz)
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
rng3 = DatetimeIndex(rng_dates, tz=tz)
other3 = DatetimeIndex([], tz=tz)
expected3 = DatetimeIndex(rng_dates, tz=tz)
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
]:
result_diff = rng.difference(other, sort)
if sort is None and len(other):
# We dont sort (yet?) when empty GH#24959
expected = expected.sort_values()
tm.assert_index_equal(result_diff, expected)
def test_difference_freq(self, sort):
# GH14323: difference of DatetimeIndex should not preserve frequency
index = date_range("20160920", "20160925", freq="D")
other = date_range("20160921", "20160924", freq="D")
expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
# preserve frequency when the difference is a contiguous
# subset of the original range
other = date_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
def test_datetimeindex_diff(self, sort):
dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
assert len(dti1.difference(dti2, sort)) == 2
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
def test_setops_preserve_freq(self, tz):
rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
result = rng[:50].union(rng[50:100])
assert result.name == rng.name
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].union(rng[30:100])
assert result.name == rng.name
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].union(rng[60:100])
assert result.name == rng.name
assert result.freq is None
assert result.tz == rng.tz
result = rng[:50].intersection(rng[25:75])
assert result.name == rng.name
assert result.freqstr == "D"
assert result.tz == rng.tz
nofreq = DatetimeIndex(list(rng[25:75]), name="other")
result = rng[:50].union(nofreq)
assert result.name is None
assert result.freq == rng.freq
assert result.tz == rng.tz
result = rng[:50].intersection(nofreq)
assert result.name is None
assert result.freq == rng.freq
assert result.tz == rng.tz
def test_intersection_non_tick_no_fastpath(self):
# GH#42104
dti = DatetimeIndex(
[
"2018-12-31",
"2019-03-31",
"2019-06-30",
"2019-09-30",
"2019-12-31",
"2020-03-31",
],
freq="QE-DEC",
)
result = dti[::2].intersection(dti[1::2])
expected = dti[:0]
tm.assert_index_equal(result, expected)
def test_dti_intersection(self):
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
left = rng[10:90][::-1]
right = rng[20:80][::-1]
assert left.tz == rng.tz
result = left.intersection(right)
assert result.tz == left.tz
# Note: not difference, as there is no symmetry requirement there
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
def test_dti_setop_aware(self, setop):
# non-overlapping
# GH#39328 as of 2.0 we cast these to UTC instead of object
rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
result = getattr(rng, setop)(rng2)
left = rng.tz_convert("UTC")
right = rng2.tz_convert("UTC")
expected = getattr(left, setop)(right)
tm.assert_index_equal(result, expected)
assert result.tz == left.tz
if len(result):
assert result[0].tz is timezone.utc
assert result[-1].tz is timezone.utc
def test_dti_union_mixed(self):
# GH#21671
rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
result = rng.union(rng2)
expected = Index(
[
Timestamp("2011-01-01"),
pd.NaT,
Timestamp("2012-01-01", tz="Asia/Tokyo"),
Timestamp("2012-01-02", tz="Asia/Tokyo"),
],
dtype=object,
)
tm.assert_index_equal(result, expected)
class TestBusinessDatetimeIndex:
def test_union(self, sort):
rng = bdate_range(START, END)
# overlapping
left = rng[:10]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
else:
expected = DatetimeIndex(list(right) + list(left))
tm.assert_index_equal(right.union(left, sort=sort), expected)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_union_not_cacheable(self, sort):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_union = rng1.union(rng2, sort=sort)
if sort is None:
tm.assert_index_equal(the_union, rng)
else:
expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
tm.assert_index_equal(the_union, expected)
rng1 = rng[10:]
rng2 = rng[15:35]
the_union = rng1.union(rng2, sort=sort)
expected = rng[10:]
tm.assert_index_equal(the_union, expected)
def test_intersection(self):
rng = date_range("1/1/2000", periods=50, freq=Minute())
rng1 = rng[10:]
rng2 = rng[:25]
the_int = rng1.intersection(rng2)
expected = rng[10:25]
tm.assert_index_equal(the_int, expected)
assert isinstance(the_int, DatetimeIndex)
assert the_int.freq == rng.freq
the_int = rng1.intersection(rng2)
tm.assert_index_equal(the_int, expected)
# non-overlapping
the_int = rng[:10].intersection(rng[10:])
expected = DatetimeIndex([]).as_unit("ns")
tm.assert_index_equal(the_int, expected)
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011")
b = bdate_range("12/10/2011", "12/20/2011")
result = a.intersection(b)
tm.assert_index_equal(result, b)
assert result.freq == b.freq
def test_intersection_list(self):
# GH#35876
# values is not an Index -> no name -> retain "a"
values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
idx = DatetimeIndex(values, name="a")
res = idx.intersection(values)
tm.assert_index_equal(res, idx)
def test_month_range_union_tz_pytz(self, sort):
tz = pytz.timezone("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
@td.skip_if_windows
def test_month_range_union_tz_dateutil(self, sort):
from pandas._libs.tslibs.timezones import dateutil_gettz
tz = dateutil_gettz("US/Eastern")
early_start = datetime(2011, 1, 1)
early_end = datetime(2011, 3, 1)
late_start = datetime(2011, 3, 1)
late_end = datetime(2011, 5, 1)
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
early_dr.union(late_dr, sort=sort)
@pytest.mark.parametrize("sort", [False, None])
def test_intersection_duplicates(self, sort):
# GH#38196
idx1 = Index(
[
Timestamp("2019-12-13"),
Timestamp("2019-12-12"),
Timestamp("2019-12-12"),
]
)
result = idx1.intersection(idx1, sort=sort)
expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
tm.assert_index_equal(result, expected)
class TestCustomDatetimeIndex:
def test_union(self, sort):
# overlapping
rng = bdate_range(START, END, freq="C")
left = rng[:10]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]
the_union = left.union(right, sort)
assert isinstance(the_union, Index)
# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]
the_union = left.union(right, sort=sort)
assert isinstance(the_union, DatetimeIndex)
# order does not matter
if sort is None:
tm.assert_index_equal(right.union(left, sort=sort), the_union)
# overlapping, but different offset
rng = date_range(START, END, freq=BMonthEnd())
the_union = rng.union(rng, sort=sort)
assert isinstance(the_union, DatetimeIndex)
def test_intersection_bug(self):
# GH #771
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
result = a.intersection(b)
tm.assert_index_equal(result, b)
assert result.freq == b.freq
@pytest.mark.parametrize(
"tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
)
def test_intersection_dst_transition(self, tz):
# GH 46702: Europe/Berlin has DST transition
idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
result = idx1.intersection(idx2)
expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
tm.assert_index_equal(result, expected)
# GH#45863 same problem for union
index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
result = index1.union(index2)
expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,251 @@
"""
Tests for DatetimeIndex timezone-related methods
"""
from datetime import (
datetime,
timedelta,
timezone,
tzinfo,
)
from dateutil.tz import gettz
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs import (
conversion,
timezones,
)
import pandas as pd
from pandas import (
DatetimeIndex,
Timestamp,
bdate_range,
date_range,
isna,
to_datetime,
)
import pandas._testing as tm
class FixedOffset(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name) -> None:
self.__offset = timedelta(minutes=offset)
self.__name = name
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return timedelta(0)
fixed_off_no_name = FixedOffset(-330, None)
class TestDatetimeIndexTimezones:
# -------------------------------------------------------------
# Unsorted
def test_dti_drop_dont_lose_tz(self):
# GH#2621
ind = date_range("2012-12-01", periods=10, tz="utc")
ind = ind.drop(ind[-1])
assert ind.tz is not None
def test_dti_tz_conversion_freq(self, tz_naive_fixture):
# GH25241
t3 = DatetimeIndex(["2019-01-01 10:00"], freq="h")
assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq
t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="min")
assert t4.tz_convert(tz="UTC").freq == t4.freq
def test_drop_dst_boundary(self):
# see gh-18031
tz = "Europe/Brussels"
freq = "15min"
start = Timestamp("201710290100", tz=tz)
end = Timestamp("201710290300", tz=tz)
index = date_range(start=start, end=end, freq=freq)
expected = DatetimeIndex(
[
"201710290115",
"201710290130",
"201710290145",
"201710290200",
"201710290215",
"201710290230",
"201710290245",
"201710290200",
"201710290215",
"201710290230",
"201710290245",
"201710290300",
],
dtype="M8[ns, Europe/Brussels]",
freq=freq,
ambiguous=[
True,
True,
True,
True,
True,
True,
True,
False,
False,
False,
False,
False,
],
)
result = index.drop(index[0])
tm.assert_index_equal(result, expected)
def test_date_range_localize(self, unit):
rng = date_range(
"3/11/2012 03:00", periods=15, freq="h", tz="US/Eastern", unit=unit
)
rng2 = DatetimeIndex(
["3/11/2012 03:00", "3/11/2012 04:00"], dtype=f"M8[{unit}, US/Eastern]"
)
rng3 = date_range("3/11/2012 03:00", periods=15, freq="h", unit=unit)
rng3 = rng3.tz_localize("US/Eastern")
tm.assert_index_equal(rng._with_freq(None), rng3)
# DST transition time
val = rng[0]
exp = Timestamp("3/11/2012 03:00", tz="US/Eastern")
assert val.hour == 3
assert exp.hour == 3
assert val == exp # same UTC value
tm.assert_index_equal(rng[:2], rng2)
def test_date_range_localize2(self, unit):
# Right before the DST transition
rng = date_range(
"3/11/2012 00:00", periods=2, freq="h", tz="US/Eastern", unit=unit
)
rng2 = DatetimeIndex(
["3/11/2012 00:00", "3/11/2012 01:00"],
dtype=f"M8[{unit}, US/Eastern]",
freq="h",
)
tm.assert_index_equal(rng, rng2)
exp = Timestamp("3/11/2012 00:00", tz="US/Eastern")
assert exp.hour == 0
assert rng[0] == exp
exp = Timestamp("3/11/2012 01:00", tz="US/Eastern")
assert exp.hour == 1
assert rng[1] == exp
rng = date_range(
"3/11/2012 00:00", periods=10, freq="h", tz="US/Eastern", unit=unit
)
assert rng[2].hour == 3
def test_timestamp_equality_different_timezones(self):
utc_range = date_range("1/1/2000", periods=20, tz="UTC")
eastern_range = utc_range.tz_convert("US/Eastern")
berlin_range = utc_range.tz_convert("Europe/Berlin")
for a, b, c in zip(utc_range, eastern_range, berlin_range):
assert a == b
assert b == c
assert a == c
assert (utc_range == eastern_range).all()
assert (utc_range == berlin_range).all()
assert (berlin_range == eastern_range).all()
def test_dti_equals_with_tz(self):
left = date_range("1/1/2011", periods=100, freq="h", tz="utc")
right = date_range("1/1/2011", periods=100, freq="h", tz="US/Eastern")
assert not left.equals(right)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_dti_tz_nat(self, tzstr):
idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT])
assert isna(idx[1])
assert idx[0].tzinfo is not None
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_utc_box_timestamp_and_localize(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tzstr)
expected = rng[-1].astimezone(tz)
stamp = rng_eastern[-1]
assert stamp == expected
assert stamp.tzinfo == expected.tzinfo
# right tzinfo
rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc")
rng_eastern = rng.tz_convert(tzstr)
# test not valid for dateutil timezones.
# assert 'EDT' in repr(rng_eastern[0].tzinfo)
assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr(
rng_eastern[0].tzinfo
)
@pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")])
def test_with_tz(self, tz):
# just want it to work
start = datetime(2011, 3, 12, tzinfo=pytz.utc)
dr = bdate_range(start, periods=50, freq=pd.offsets.Hour())
assert dr.tz is pytz.utc
# DateRange with naive datetimes
dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)
dr = bdate_range("1/1/2005", "1/1/2009", tz=tz)
# normalized
central = dr.tz_convert(tz)
assert central.tz is tz
naive = central[0].to_pydatetime().replace(tzinfo=None)
comp = conversion.localize_pydatetime(naive, tz).tzinfo
assert central[0].tz is comp
# compare vs a localized tz
naive = dr[0].to_pydatetime().replace(tzinfo=None)
comp = conversion.localize_pydatetime(naive, tz).tzinfo
assert central[0].tz is comp
# datetimes with tzinfo set
dr = bdate_range(
datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc)
)
msg = "Start and end cannot both be tz-aware with different timezones"
with pytest.raises(Exception, match=msg):
bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz)
@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
def test_dti_convert_tz_aware_datetime_datetime(self, tz):
# GH#1581
dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]
dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates]
result = DatetimeIndex(dates_aware).as_unit("ns")
assert timezones.tz_compare(result.tz, tz)
converted = to_datetime(dates_aware, utc=True).as_unit("ns")
ex_vals = np.array([Timestamp(x).as_unit("ns")._value for x in dates_aware])
tm.assert_numpy_array_equal(converted.asi8, ex_vals)
assert converted.tz is timezone.utc