Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,499 @@
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import re
|
||||
|
||||
from dateutil.tz import (
|
||||
gettz,
|
||||
tzutc,
|
||||
)
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
tm.assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s[48] = -1
|
||||
assert s.iloc[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s.iloc[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
|
||||
def test_getitem_setitem_datetime_tz(tz_source):
|
||||
if tz_source == "pytz":
|
||||
tzget = pytz.timezone
|
||||
else:
|
||||
# handle special case for utc in dateutil
|
||||
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
|
||||
dt = dt.to_pydatetime()
|
||||
result[dt] = 0
|
||||
result[dt] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# But we do not give datetimes a pass on tzawareness compat
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
naive = datetime(1990, 1, 1, 4)
|
||||
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
# GH#36148 as of 2.0 we require tzawareness-compat
|
||||
ts[key]
|
||||
|
||||
result = ts.copy()
|
||||
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
|
||||
# so setting it as a new key casts to object rather than matching
|
||||
# rng[4]
|
||||
result[naive] = ts.iloc[4]
|
||||
assert result.index.dtype == object
|
||||
tm.assert_index_equal(result.index[:-1], rng.astype(object))
|
||||
assert result.index[-1] == naive
|
||||
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
ts[naive : datetime(1990, 1, 1, 7)]
|
||||
|
||||
result = ts.copy()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# GH#36148 require tzawareness compat as of 2.0
|
||||
result[naive : datetime(1990, 1, 1, 7)] = 99
|
||||
# the __setitems__ here failed, so result should still match ts
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = naive
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = Timestamp(naive).tz_localize(rng.tzinfo)
|
||||
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="h")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts.iloc[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts.iloc[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
# test duplicates in time series
|
||||
|
||||
|
||||
def test_indexing_with_duplicate_datetimeindex(
|
||||
rand_series_with_duplicate_datetimeindex,
|
||||
):
|
||||
ts = rand_series_with_duplicate_datetimeindex
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_almost_equal(result, expected.iloc[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
tm.assert_series_equal(cp, expected)
|
||||
|
||||
key = datetime(2000, 1, 6)
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
ts[key]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_loc_getitem_over_size_cutoff(monkeypatch):
|
||||
# #1821
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = timedelta(seconds=1)
|
||||
half_sec = timedelta(microseconds=500000)
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(dates), 4)),
|
||||
index=dates,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = period_range("1/1/2000", freq="min", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for key in [
|
||||
slice("2011-01-01", "2011-01-15"),
|
||||
slice("2010-12-30", "2011-01-15"),
|
||||
slice("2011-01-01", "2011-01-16"),
|
||||
# partial ranges
|
||||
slice("2011-01-01", "2011-01-6"),
|
||||
slice("2011-01-06", "2011-01-8"),
|
||||
slice("2011-01-06", "2011-01-12"),
|
||||
]:
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
compare(key)
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_unordered2():
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.default_rng(2).permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="ME")
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
result = ts["2001"]
|
||||
tm.assert_series_equal(result, ts.iloc[:12])
|
||||
|
||||
df = DataFrame({"A": ts.copy()})
|
||||
|
||||
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
|
||||
# like any other key, so raises
|
||||
with pytest.raises(KeyError, match="2001"):
|
||||
df["2001"]
|
||||
|
||||
# setting
|
||||
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
|
||||
expected = ts.copy()
|
||||
expected.iloc[:12] = 1
|
||||
ts["2001"] = 1
|
||||
tm.assert_series_equal(ts, expected)
|
||||
|
||||
expected = df.copy()
|
||||
expected.iloc[:12, 0] = 1
|
||||
df.loc["2001", "A"] = 1
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_getitem_str_month_with_datetimeindex():
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_year_with_datetimeindex():
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_second_with_datetimeindex():
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((5, 5)),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
|
||||
|
||||
def test_compare_datetime_with_all_none():
|
||||
# GH#54870
|
||||
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
|
||||
ser2 = Series([None, None])
|
||||
result = ser > ser2
|
||||
expected = Series([False, False])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,70 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDelItem:
|
||||
def test_delitem(self):
|
||||
# GH#5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
tm.assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
def test_delitem_object_index(self, using_infer_string):
|
||||
# Index(dtype=object)
|
||||
dtype = "string[pyarrow_numpy]" if using_infer_string else object
|
||||
s = Series(1, index=Index(["a"], dtype=dtype))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
s["a"] = 1
|
||||
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
|
||||
|
||||
def test_delitem_missing_key(self):
|
||||
# empty
|
||||
s = Series(dtype=object)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
def test_delitem_extension_dtype(self):
|
||||
# GH#40386
|
||||
# DatetimeTZDtype
|
||||
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
ser = Series(dti)
|
||||
|
||||
expected = ser[[0, 2]]
|
||||
del ser[1]
|
||||
assert ser.dtype == dti.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
# PeriodDtype
|
||||
pi = dti.tz_localize(None).to_period("D")
|
||||
ser = Series(pi)
|
||||
|
||||
expected = ser[:2]
|
||||
del ser[2]
|
||||
assert ser.dtype == pi.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
@ -0,0 +1,238 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
],
|
||||
dtype=np.float64,
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan(float_numpy_dtype):
|
||||
# GH 8569
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple(float_numpy_dtype):
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = Index(range(10), dtype=float_numpy_dtype).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [2, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_get_with_default():
|
||||
# GH#7725
|
||||
d0 = ["a", "b", "c", "d"]
|
||||
d1 = np.arange(4, dtype="int64")
|
||||
|
||||
for data, index in ((d0, d1), (d1, d0)):
|
||||
s = Series(data, index=index)
|
||||
for i, d in zip(index, data):
|
||||
assert s.get(i) == d
|
||||
assert s.get(i, d) == d
|
||||
assert s.get(i, "z") == d
|
||||
|
||||
assert s.get("e", "z") == "z"
|
||||
assert s.get("e", "e") == "e"
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
warn = None
|
||||
if index is d0:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
assert s.get(10, "z") == "z"
|
||||
assert s.get(10, 10) == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[
|
||||
np.random.default_rng(2).standard_normal(10),
|
||||
DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
|
||||
tz="US/Eastern"
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_with_ea(arr):
|
||||
# GH#21260
|
||||
ser = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert ser.get(4) == ser.iloc[2]
|
||||
|
||||
result = ser.get([4, 6])
|
||||
expected = ser.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get(slice(2))
|
||||
expected = ser.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert ser.get(-1) is None
|
||||
assert ser.get(ser.index.max() + 1) is None
|
||||
|
||||
ser = Series(arr[:6], index=list("abcdef"))
|
||||
assert ser.get("c") == ser.iloc[2]
|
||||
|
||||
result = ser.get(slice("b", "d"))
|
||||
expected = ser.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get("Z")
|
||||
assert result is None
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(4) == ser.iloc[4]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(-1) == ser.iloc[-1]
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert ser.get(len(ser)) is None
|
||||
|
||||
# GH#21257
|
||||
ser = Series(arr)
|
||||
ser2 = ser[::2]
|
||||
assert ser2.get(1) is None
|
||||
|
||||
|
||||
def test_getitem_get(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
|
||||
for obj in [string_series, object_series]:
|
||||
idx = obj.index[5]
|
||||
|
||||
assert obj[idx] == obj.get(idx)
|
||||
assert obj[idx] == obj.iloc[5]
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert string_series.get(-1) == string_series.get(string_series.index[-1])
|
||||
assert string_series.iloc[5] == string_series.get(string_series.index[5])
|
||||
|
||||
|
||||
def test_get_none():
|
||||
# GH#5652
|
||||
s1 = Series(dtype=object)
|
||||
s2 = Series(dtype=object, index=list("abc"))
|
||||
for s in [s1, s2]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
@ -0,0 +1,735 @@
|
||||
"""
|
||||
Series.__getitem__ test classes are organized by the type of key passed.
|
||||
"""
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
conversion,
|
||||
timezones,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
class TestSeriesGetitemScalars:
|
||||
def test_getitem_object_index_float_string(self):
|
||||
# GH#17286
|
||||
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
|
||||
assert ser["a"] == 1
|
||||
assert ser[1.0] == 1
|
||||
|
||||
def test_getitem_float_keys_tuple_values(self):
|
||||
# see GH#13509
|
||||
|
||||
# unique Index
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
|
||||
result = ser[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
# non-unique Index
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
|
||||
|
||||
result = ser[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_unrecognized_scalar(self):
|
||||
# GH#32684 a scalar key that is not recognized by lib.is_scalar
|
||||
|
||||
# a series that might be produced via `frame.dtypes`
|
||||
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
|
||||
|
||||
key = ser.index[1]
|
||||
|
||||
result = ser[key]
|
||||
assert result == 2
|
||||
|
||||
def test_getitem_negative_out_of_bounds(self):
|
||||
ser = Series(["a"] * 10, index=["a"] * 10)
|
||||
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
ser[-11]
|
||||
|
||||
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
|
||||
# don't segfault, GH#495
|
||||
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
|
||||
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
datetime_series[len(datetime_series)]
|
||||
|
||||
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
|
||||
# GH#917
|
||||
# With a RangeIndex, an int key gives a KeyError
|
||||
ser = Series([], dtype=object)
|
||||
with pytest.raises(KeyError, match="-1"):
|
||||
ser[-1]
|
||||
|
||||
def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
|
||||
dtype = any_int_numpy_dtype
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6),
|
||||
index=Index([0, 0, 1, 1, 2, 2], dtype=dtype),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
# not monotonic
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1]
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
def test_getitem_int64(self, datetime_series):
|
||||
idx = np.int64(5)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = datetime_series[idx]
|
||||
assert res == datetime_series.iloc[5]
|
||||
|
||||
def test_getitem_full_range(self):
|
||||
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
|
||||
ser = Series(range(5), index=list(range(5)))
|
||||
result = ser[list(range(5))]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with DatetimeIndex
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(
|
||||
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="h", tz=tzstr
|
||||
)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range("1/1/2000", periods=10, tz=tz)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
result = ser["1/3/2000"]
|
||||
tm.assert_almost_equal(result, ser.iloc[2])
|
||||
|
||||
def test_getitem_time_object(self):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
mask = (rng.hour == 9) & (rng.minute == 30)
|
||||
result = ts[time(9, 30)]
|
||||
expected = ts[mask]
|
||||
result.index = result.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with CategoricalIndex
|
||||
|
||||
def test_getitem_scalar_categorical_index(self):
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
ser = Series([1, 2], index=cats)
|
||||
|
||||
expected = ser.iloc[0]
|
||||
result = ser[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
|
||||
# GH#15470
|
||||
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
|
||||
|
||||
# 0 is treated as a label
|
||||
assert ser[0] == "c"
|
||||
|
||||
# the listlike analogue should also be treated as labels
|
||||
res = ser[[0]]
|
||||
expected = ser.iloc[-1:]
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
res2 = ser[[0, 1, 2]]
|
||||
tm.assert_series_equal(res2, ser.iloc[::-1])
|
||||
|
||||
def test_getitem_integer_categorical_not_positional(self):
|
||||
# GH#14865
|
||||
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
|
||||
assert ser.get(3) == "c"
|
||||
assert ser[3] == "c"
|
||||
|
||||
def test_getitem_str_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
key = "6 days, 23:11:12"
|
||||
indexer = rng.get_loc(key)
|
||||
assert indexer == 133
|
||||
|
||||
result = ser[key]
|
||||
assert result == ser.iloc[133]
|
||||
|
||||
msg = r"^Timedelta\('50 days 00:00:00'\)$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
rng.get_loc("50 days")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser["50 days"]
|
||||
|
||||
def test_getitem_bool_index_positional(self):
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[0]
|
||||
assert result == 1
|
||||
|
||||
|
||||
class TestSeriesGetitemSlices:
|
||||
def test_getitem_partial_str_slice_with_datetimeindex(self):
|
||||
# GH#34860
|
||||
arr = date_range("1/1/2008", "1/1/2009")
|
||||
ser = arr.to_series()
|
||||
result = ser["2008"]
|
||||
|
||||
rng = date_range(start="2008-01-01", end="2008-12-31")
|
||||
expected = Series(rng, index=rng)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_strings_with_datetimeindex(self):
|
||||
idx = DatetimeIndex(
|
||||
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
|
||||
)
|
||||
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["5 day":"6 day"]
|
||||
expected = ser.iloc[86:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["5 day":]
|
||||
expected = ser.iloc[86:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[:"6 day"]
|
||||
expected = ser.iloc[:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
|
||||
# higher reso
|
||||
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["1 day 10:11:12":]
|
||||
expected = ser.iloc[0:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 day 10:11:12.001":]
|
||||
expected = ser.iloc[1000:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 days, 10:11:12.001001"]
|
||||
assert result == ser.iloc[1001]
|
||||
|
||||
def test_getitem_slice_2d(self, datetime_series):
|
||||
# GH#30588 multi-dimensional indexing deprecated
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
datetime_series[:, np.newaxis]
|
||||
|
||||
def test_getitem_median_slice_bug(self):
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
ser[indexer]
|
||||
# but we're OK with a single-element tuple
|
||||
result = ser[(indexer[0],)]
|
||||
expected = ser[indexer[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc, positions",
|
||||
[
|
||||
[slice(date(2018, 1, 1), None), [0, 1, 2]],
|
||||
[slice(date(2019, 1, 2), None), [2]],
|
||||
[slice(date(2020, 1, 1), None), []],
|
||||
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
|
||||
[slice(None, date(2019, 1, 1)), [0]],
|
||||
],
|
||||
)
|
||||
def test_getitem_slice_date(self, slc, positions):
|
||||
# https://github.com/pandas-dev/pandas/issues/31501
|
||||
ser = Series(
|
||||
[0, 1, 2],
|
||||
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
|
||||
)
|
||||
result = ser[slc]
|
||||
expected = ser.take(positions)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_float_raises(self, datetime_series):
|
||||
msg = (
|
||||
"cannot do slice indexing on DatetimeIndex with these indexers "
|
||||
r"\[{key}\] of type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
datetime_series[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
datetime_series[4.5:10.0]
|
||||
|
||||
def test_getitem_slice_bug(self):
|
||||
ser = Series(range(10), index=list(range(10)))
|
||||
result = ser[-12:]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
result = ser[-7:]
|
||||
tm.assert_series_equal(result, ser[3:])
|
||||
|
||||
result = ser[:-12]
|
||||
tm.assert_series_equal(result, ser[:0])
|
||||
|
||||
def test_getitem_slice_integers(self):
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(8),
|
||||
index=[2, 4, 6, 8, 10, 12, 14, 16],
|
||||
)
|
||||
|
||||
result = ser[:4]
|
||||
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesGetitemListLike:
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
|
||||
def test_getitem_no_matches(self, box):
|
||||
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
|
||||
ser = Series(["A", "B"])
|
||||
|
||||
key = Series(["C"], dtype=object)
|
||||
key = box(key)
|
||||
|
||||
msg = (
|
||||
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_intlist_intindex_periodvalues(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
result = ser[[2, 4]]
|
||||
exp = Series(
|
||||
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
|
||||
index=[2, 4],
|
||||
dtype="Period[D]",
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == "Period[D]"
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
def test_getitem_intlist_intervalindex_non_int(self, box):
|
||||
# GH#33404 fall back to positional since ints are unambiguous
|
||||
dti = date_range("2000-01-03", periods=3)._with_freq(None)
|
||||
ii = pd.IntervalIndex.from_breaks(dti)
|
||||
ser = Series(range(len(ii)), index=ii)
|
||||
|
||||
expected = ser.iloc[:1]
|
||||
key = box([0])
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser[key]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
|
||||
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
|
||||
# GH#33404 do _not_ fall back to positional since ints are ambiguous
|
||||
idx = Index(range(4)).astype(dtype)
|
||||
dti = date_range("2000-01-03", periods=3)
|
||||
mi = pd.MultiIndex.from_product([idx, dti])
|
||||
ser = Series(range(len(mi))[::-1], index=mi)
|
||||
|
||||
key = box([5])
|
||||
with pytest.raises(KeyError, match="5"):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
|
||||
# GH #37218
|
||||
ser = Series([1, 2, 3])
|
||||
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser[key]
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser.loc[key]
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_boolean(self, string_series):
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
|
||||
# passing list is OK
|
||||
result = ser[list(mask)]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, ser.index[mask])
|
||||
|
||||
def test_getitem_boolean_empty(self):
|
||||
ser = Series([], dtype=np.int64)
|
||||
ser.index.name = "index_name"
|
||||
ser = ser[ser.isna()]
|
||||
assert ser.index.name == "index_name"
|
||||
assert ser.dtype == np.int64
|
||||
|
||||
# GH#5877
|
||||
# indexing with empty series
|
||||
ser = Series(["A", "B"], dtype=object)
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = ser[Series([], dtype=object)]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([True], dtype=bool)]
|
||||
|
||||
def test_getitem_boolean_object(self, string_series):
|
||||
# using column from DataFrame
|
||||
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = ser[omask]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = ser.copy()
|
||||
cop = ser.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
tm.assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "Cannot mask with non-boolean array containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask] = 5
|
||||
|
||||
def test_getitem_boolean_dt64_copies(self):
|
||||
# GH#36210
|
||||
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
|
||||
key = np.array([True, True, False, False])
|
||||
|
||||
ser = Series(dti._data)
|
||||
|
||||
res = ser[key]
|
||||
assert res._values._ndarray.base is None
|
||||
|
||||
# compare with numeric case for reference
|
||||
ser2 = Series(range(4))
|
||||
res2 = ser2[key]
|
||||
assert res2._values.base is None
|
||||
|
||||
def test_getitem_boolean_corner(self, datetime_series):
|
||||
ts = datetime_series
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
|
||||
def test_getitem_boolean_different_order(self, string_series):
|
||||
ordered = string_series.sort_values()
|
||||
|
||||
sel = string_series[ordered > 0]
|
||||
exp = string_series[string_series > 0]
|
||||
tm.assert_series_equal(sel, exp)
|
||||
|
||||
def test_getitem_boolean_contiguous_preserve_freq(self):
|
||||
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
||||
|
||||
mask = np.zeros(len(rng), dtype=bool)
|
||||
mask[10:20] = True
|
||||
|
||||
masked = rng[mask]
|
||||
expected = rng[10:20]
|
||||
assert expected.freq == rng.freq
|
||||
tm.assert_index_equal(masked, expected)
|
||||
|
||||
mask[22] = True
|
||||
masked = rng[mask]
|
||||
assert masked.freq is None
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self):
|
||||
# GH#12533
|
||||
ser = Series(4, index=list("ABCD"))
|
||||
result = ser[lambda x: "A"]
|
||||
assert result == ser.loc["A"]
|
||||
|
||||
result = ser[lambda x: ["A", "B"]]
|
||||
expected = ser.loc[["A", "B"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[lambda x: [True, False, True, True]]
|
||||
expected = ser.iloc[[0, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_generator(string_series):
|
||||
gen = (x > 0 for x in string_series)
|
||||
result = string_series[gen]
|
||||
result2 = string_series[iter(string_series > 0)]
|
||||
expected = string_series[string_series > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([0, 1]),
|
||||
Series(date_range("2012-01-01", periods=2)),
|
||||
Series(date_range("2012-01-01", periods=2, tz="CET")),
|
||||
],
|
||||
)
|
||||
def test_getitem_ndim_deprecated(series):
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
series[:, None]
|
||||
|
||||
|
||||
def test_getitem_multilevel_scalar_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
|
||||
msg = r"\(2000, slice\(3, 4, None\)\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[2000, 3:4]
|
||||
|
||||
|
||||
def test_getitem_dataframe_raises():
|
||||
rng = list(range(10))
|
||||
ser = Series(10, index=rng)
|
||||
df = DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported, "
|
||||
"use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[df > 5]
|
||||
|
||||
|
||||
def test_getitem_assignment_series_alignment():
|
||||
# https://github.com/pandas-dev/pandas/issues/37427
|
||||
# with getitem, when assigning with a Series, it is not first aligned
|
||||
ser = Series(range(10))
|
||||
idx = np.array([2, 4, 9])
|
||||
ser[idx] = Series([10, 11, 12])
|
||||
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
|
||||
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
|
||||
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser[None]
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index.get_loc(None)
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index._engine.get_loc(None)
|
||||
|
||||
|
||||
def test_getitem_1tuple_slice_without_multiindex():
|
||||
ser = Series(range(5))
|
||||
key = (slice(3),)
|
||||
|
||||
result = ser[key]
|
||||
expected = ser[key[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_preserve_name(datetime_series):
|
||||
result = datetime_series[datetime_series > 0]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series[[0, 2, 4]]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
result = datetime_series[5:10]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
|
||||
def test_getitem_with_integer_labels():
|
||||
# integer indexes, be careful
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
|
||||
)
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[inds]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[arr_inds]
|
||||
|
||||
|
||||
def test_getitem_missing(datetime_series):
|
||||
# missing
|
||||
d = datetime_series.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[d]
|
||||
|
||||
|
||||
def test_getitem_fancy(string_series, object_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
slice1 = string_series[[1, 2, 3]]
|
||||
slice2 = object_series[[1, 2, 3]]
|
||||
assert string_series.index[2] == slice1.index[1]
|
||||
assert object_series.index[2] == slice2.index[1]
|
||||
assert string_series.iloc[2] == slice1.iloc[1]
|
||||
assert object_series.iloc[2] == slice2.iloc[1]
|
||||
|
||||
|
||||
def test_getitem_box_float64(datetime_series):
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
value = datetime_series[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = ser["C"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_categorical_str():
|
||||
# GH#31765
|
||||
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
|
||||
result = ser["a"]
|
||||
expected = ser.iloc[[0, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
ser = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
ser[::-1] # it works!
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
|
||||
def test_duplicated_index_getitem_positional_indexer(index_vals):
|
||||
# GH 11747
|
||||
s = Series(range(5), index=list(index_vals))
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s[3]
|
||||
assert result == 3
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser[key] = 1
|
@ -0,0 +1,518 @@
|
||||
""" test get/set & misc """
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import IndexingError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
isna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
|
||||
)
|
||||
|
||||
warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
result = ser[1]
|
||||
expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
|
||||
tm.assert_series_equal(result, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
|
||||
# GH51053
|
||||
dtype = any_numeric_dtype
|
||||
idx = Index([1, 0, 1], dtype=dtype)
|
||||
ser = Series(range(3), index=idx)
|
||||
ser[1] = 10
|
||||
expected = Series([10, 1, 10], index=idx)
|
||||
tm.assert_series_equal(ser, expected, check_exact=True)
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(datetime_series):
|
||||
indices = datetime_series.index[[5, 10, 15]]
|
||||
|
||||
result = datetime_series[indices]
|
||||
expected = datetime_series.reindex(indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = datetime_series[indices[0] : indices[2]]
|
||||
expected = datetime_series.loc[indices[0] : indices[2]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_basic_getitem_dt64tz_values():
|
||||
# GH12089
|
||||
# with tz for values
|
||||
ser = Series(
|
||||
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = ser.loc["a"]
|
||||
assert result == expected
|
||||
result = ser.iloc[0]
|
||||
assert result == expected
|
||||
result = ser["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write):
|
||||
s = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
result = s[...]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[...] = 5
|
||||
if not using_copy_on_write:
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
Series({1: 313}),
|
||||
Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1._append(duplicate_item)
|
||||
expected = expected_1._append(duplicate_item)
|
||||
tm.assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[1], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[3], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
ser = Series(rng, index=rng)
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[rng[1]], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[rng[3]], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
|
||||
def test_series_box_timedelta():
|
||||
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timedelta)
|
||||
assert isinstance(ser.at[1], Timedelta)
|
||||
assert isinstance(ser.iat[2], Timedelta)
|
||||
assert isinstance(ser.loc[3], Timedelta)
|
||||
assert isinstance(ser.iloc[4], Timedelta)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror(indexer_sl):
|
||||
ser = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
indexer_sl(ser)[1]
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing(indexer_sl):
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
|
||||
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror(indexer_sl):
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[1] = 5
|
||||
expected = concat([s, Series([5], index=[1])])
|
||||
tm.assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_setitem(datetime_series):
|
||||
datetime_series[datetime_series.index[5]] = np.nan
|
||||
datetime_series.iloc[[1, 2, 17]] = np.nan
|
||||
datetime_series.iloc[6] = np.nan
|
||||
assert np.isnan(datetime_series.iloc[6])
|
||||
assert np.isnan(datetime_series.iloc[2])
|
||||
datetime_series[np.isnan(datetime_series)] = 5
|
||||
assert not np.isnan(datetime_series.iloc[2])
|
||||
|
||||
|
||||
def test_setslice(datetime_series):
|
||||
sl = datetime_series[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
def test_basic_getitem_setitem_corner(datetime_series):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "key of type tuple not found and not a MultiIndex"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] raises but not two slices
|
||||
msg = "Indexing with a single-item list"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# GH#31299
|
||||
datetime_series[[slice(None, 5)]]
|
||||
|
||||
# but we're OK with a single-element tuple
|
||||
result = datetime_series[(slice(None, 5),)]
|
||||
expected = datetime_series[:5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, [None, None]]] = 2
|
||||
|
||||
|
||||
def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write):
|
||||
original = string_series.copy()
|
||||
numSlice = string_series[10:20]
|
||||
numSliceEnd = string_series[-10:]
|
||||
objSlice = object_series[10:20]
|
||||
|
||||
assert string_series.index[9] not in numSlice.index
|
||||
assert object_series.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == string_series.index[11]
|
||||
tm.assert_numpy_array_equal(np.array(numSliceEnd), np.array(string_series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = string_series[10:20]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
sl[:] = 0
|
||||
|
||||
if using_copy_on_write:
|
||||
# Doesn't modify parent (CoW)
|
||||
tm.assert_series_equal(string_series, original)
|
||||
else:
|
||||
assert (string_series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([], dtype=object)
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion(using_copy_on_write):
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
return_value = df.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df_original = df.copy()
|
||||
df
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["val"].update(s)
|
||||
expected = df_original
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["val"].update(s)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
|
||||
)
|
||||
return_value = expected.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(datetime_series):
|
||||
seq = datetime_series.iloc[[5, 10, 15]]
|
||||
seq.iloc[1] = np.nan
|
||||
assert not np.isnan(datetime_series.iloc[10])
|
||||
|
||||
|
||||
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
|
||||
index = lexsorted_two_level_string_multiindex
|
||||
ser = Series(
|
||||
np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth"
|
||||
)
|
||||
|
||||
result = indexer_sl(ser)["foo"]
|
||||
assert result.name == ser.name
|
||||
|
||||
|
||||
# miscellaneous methods
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
date_range("2014-01-01", periods=20, freq="MS"),
|
||||
period_range("2014-01", periods=20, freq="M"),
|
||||
timedelta_range("0", periods=20, freq="h"),
|
||||
],
|
||||
)
|
||||
def test_slice_with_negative_step(index):
|
||||
keystr1 = str(index[9])
|
||||
keystr2 = str(index[13])
|
||||
|
||||
ser = Series(np.arange(20), index)
|
||||
SLC = IndexSlice
|
||||
|
||||
for key in [keystr1, index[9]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
|
||||
|
||||
for key2 in [keystr2, index[13]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
|
||||
|
||||
|
||||
def test_tuple_index():
|
||||
# GH 35534 - Selecting values when a Series has an Index of tuples
|
||||
s = Series([1, 2], index=[("a",), ("b",)])
|
||||
assert s[("a",)] == 1
|
||||
assert s[("b",)] == 2
|
||||
s[("b",)] = 3
|
||||
assert s[("b",)] == 3
|
||||
|
||||
|
||||
def test_frozenset_index():
|
||||
# GH35747 - Selecting values when a Series has an Index of frozenset
|
||||
idx0, idx1 = frozenset("a"), frozenset("b")
|
||||
s = Series([1, 2], index=[idx0, idx1])
|
||||
assert s[idx0] == 1
|
||||
assert s[idx1] == 2
|
||||
s[idx1] = 3
|
||||
assert s[idx1] == 3
|
||||
|
||||
|
||||
def test_loc_setitem_all_false_indexer():
|
||||
# GH#45778
|
||||
ser = Series([1, 2], index=["a", "b"])
|
||||
expected = ser.copy()
|
||||
rhs = Series([6, 7], index=["a", "b"])
|
||||
ser.loc[ser > 100] = rhs
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_non_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
result = ser.loc[Series([NA, False], dtype="boolean")]
|
||||
expected = Series([], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_boolean_indexer_miss_matching_index():
|
||||
# GH#46551
|
||||
ser = Series([1])
|
||||
indexer = Series([NA, False], dtype="boolean", index=[1, 2])
|
||||
with pytest.raises(IndexingError, match="Unalignable"):
|
||||
ser.loc[indexer]
|
||||
|
||||
|
||||
def test_loc_setitem_nested_data_enlargement():
|
||||
# GH#48614
|
||||
df = DataFrame({"a": [1]})
|
||||
ser = Series({"label": df})
|
||||
ser.loc["new_label"] = df
|
||||
expected = Series({"label": df, "new_label": df})
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_loc_ea_numeric_index_oob_slice_end():
|
||||
# GH#50161
|
||||
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
|
||||
result = ser.loc[2:3]
|
||||
expected = Series(1, index=Index([2], dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_bool_int_key():
|
||||
# GH#48653
|
||||
ser = Series({True: 1, False: 0})
|
||||
with pytest.raises(KeyError, match="0"):
|
||||
ser.loc[0]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [{}, {"b": "x"}])
|
||||
@pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
|
||||
def test_setitem_empty_indexer(indexer, val):
|
||||
# GH#45981
|
||||
df = DataFrame({"a": [1, 2], **val})
|
||||
expected = df.copy()
|
||||
df.loc[indexer] = 1.5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_disallowed(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2])
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_setitem_dict_and_set_disallowed_multiindex(self, key):
|
||||
# GH#42825 enforced in 2.0
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
ser.loc[key] = 1
|
||||
|
||||
|
||||
class TestSetitemValidation:
|
||||
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
|
||||
# but checks for warnings instead of errors.
|
||||
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
|
||||
msg = "Setting an item of incompatible dtype is deprecated"
|
||||
msg = re.escape(msg)
|
||||
|
||||
orig_ser = ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.iloc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.loc[indexer] = invalid
|
||||
ser = orig_ser.copy()
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser[:] = invalid
|
||||
|
||||
_invalid_scalars = [
|
||||
1 + 2j,
|
||||
"True",
|
||||
"1",
|
||||
"1.0",
|
||||
NaT,
|
||||
np.datetime64("NaT"),
|
||||
np.timedelta64("NaT"),
|
||||
]
|
||||
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_bool(self, invalid, indexer):
|
||||
ser = Series([True, False, False], dtype="bool")
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
|
||||
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
|
||||
warn = None
|
||||
else:
|
||||
warn = FutureWarning
|
||||
self._check_setitem_invalid(ser, invalid, indexer, warn)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
|
||||
@pytest.mark.parametrize("indexer", _indexers)
|
||||
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
|
||||
ser = Series([1, 2, None], dtype=float_numpy_dtype)
|
||||
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
|
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
|
||||
def test_mask_casts():
|
||||
# dtype changes
|
||||
ser = Series([1, 2, 3, 4])
|
||||
result = ser.mask(ser > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_casts2():
|
||||
# see gh-21891
|
||||
ser = Series([1, 2])
|
||||
res = ser.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[~cond])
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.mask(cond, -s))
|
@ -0,0 +1,45 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# GH#1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
s = Series(dtype=object)
|
||||
s._set_value(dates[0], 1.0)
|
||||
s._set_value(dates[1], np.nan)
|
||||
|
||||
expected = Series([1.0, np.nan], index=index)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_set_value_dt64(datetime_series):
|
||||
idx = datetime_series.index[10]
|
||||
res = datetime_series._set_value(idx, 0)
|
||||
assert res is None
|
||||
assert datetime_series[idx] == 0
|
||||
|
||||
|
||||
def test_set_value_str_index(string_series):
|
||||
# equiv
|
||||
ser = string_series.copy()
|
||||
res = ser._set_value("foobar", 0)
|
||||
assert res is None
|
||||
assert ser.index[-1] == "foobar"
|
||||
assert ser["foobar"] == 0
|
||||
|
||||
ser2 = string_series.copy()
|
||||
ser2.loc["foobar"] = 0
|
||||
assert ser2.index[-1] == "foobar"
|
||||
assert ser2["foobar"] == 0
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take_validate_axis():
|
||||
# GH#51022
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "No axis named foo for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.take([1, 2], axis="foo")
|
||||
|
||||
|
||||
def test_take():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = ser.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = ser.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
ser = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = ser.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_take_slice_raises():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
msg = "Series.take requires a sequence of integers, not slice"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.take(slice(0, 3, 1))
|
@ -0,0 +1,481 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_where_unsafe_int(any_signed_int_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(
|
||||
list(range(2, 7)) + list(range(5, 10)),
|
||||
dtype=any_signed_int_numpy_dtype,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=float_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_numpy_dtype)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
warn = (
|
||||
None
|
||||
if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f"
|
||||
else FutureWarning
|
||||
)
|
||||
with tm.assert_produces_warning(warn, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
tm.assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = (
|
||||
lambda x: f"cannot set using a {x} indexer with a "
|
||||
"different length than the value"
|
||||
)
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
tm.assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
# GH#8801, GH#4195
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
|
||||
s[selection] = item
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.default_rng(2).standard_normal(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[cond])
|
||||
tm.assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
|
||||
def test_where_datetimelike_coerce(dtype):
|
||||
ser = Series([1, 2], dtype=dtype)
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10, 10])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, 10.0)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.where(mask, [10.0, 10.0])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, np.nan], dtype="object")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetimetz():
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
|
||||
rs = ser.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_sparse():
|
||||
# GH#17198 make sure we dont get an AttributeError for sp_index
|
||||
ser = Series(pd.arrays.SparseArray([1, 2]))
|
||||
result = ser.where(ser >= 2, 0)
|
||||
expected = Series(pd.arrays.SparseArray([0, 2]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
|
||||
# https://github.com/pandas-dev/pandas/issues/34592
|
||||
ser = Series([], dtype=float)
|
||||
result = ser.where([])
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_where_categorical(frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/18888
|
||||
exp = frame_or_series(
|
||||
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
|
||||
dtype="category",
|
||||
)
|
||||
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
|
||||
res = df.where(df != "C")
|
||||
tm.assert_equal(exp, res)
|
||||
|
||||
|
||||
def test_where_datetimelike_categorical(tz_naive_fixture):
|
||||
# GH#37682
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
|
||||
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
|
||||
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
|
||||
|
||||
mask = np.array([True, True, False])
|
||||
|
||||
# DatetimeIndex.where
|
||||
res = lvals.where(mask, rvals)
|
||||
tm.assert_index_equal(res, dr)
|
||||
|
||||
# DatetimeArray.where
|
||||
res = lvals._data._where(mask, rvals)
|
||||
tm.assert_datetime_array_equal(res, dr._data)
|
||||
|
||||
# Series.where
|
||||
res = Series(lvals).where(mask, rvals)
|
||||
tm.assert_series_equal(res, Series(dr))
|
||||
|
||||
# DataFrame.where
|
||||
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
|
||||
|
||||
tm.assert_frame_equal(res, pd.DataFrame(dr))
|
@ -0,0 +1,82 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_xs_datetimelike_wrapping():
|
||||
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
|
||||
arr = date_range("2016-01-01", periods=3)._data._ndarray
|
||||
|
||||
ser = Series(arr, dtype=object)
|
||||
for i in range(len(ser)):
|
||||
ser.iloc[i] = arr[i]
|
||||
assert ser.dtype == object
|
||||
assert isinstance(ser[0], np.datetime64)
|
||||
|
||||
result = ser.xs(0)
|
||||
assert isinstance(result, np.datetime64)
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_level_series(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
ser = df["A"]
|
||||
expected = ser[:, "two"]
|
||||
result = df.xs("two", level=1)["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label(self):
|
||||
# GH#5684
|
||||
idx = MultiIndex.from_tuples(
|
||||
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
|
||||
)
|
||||
ser = Series([1, 2, 3, 4], index=idx)
|
||||
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
|
||||
assert return_value is None
|
||||
expected = Series([1, 3], index=["a", "b"])
|
||||
return_value = expected.index.set_names(["L1"], inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
result = ser.xs("one", level="L2")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs(self):
|
||||
# GH#6258
|
||||
dt = list(date_range("20130903", periods=3))
|
||||
idx = MultiIndex.from_product([list("AB"), dt])
|
||||
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list("AB"))
|
||||
|
||||
result = ser.xs("20130903", level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_xs_droplevel_false(self):
|
||||
# GH: 19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
ser = Series([1, 1, 1], index=mi)
|
||||
result = ser.xs("a", axis=0, drop_level=False)
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_key_as_list(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
|
||||
ser = Series([1], index=mi)
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a", "x"], axis=0, drop_level=False)
|
||||
|
||||
with pytest.raises(TypeError, match="list keys are not supported"):
|
||||
ser.xs(["a"], axis=0, drop_level=False)
|
Reference in New Issue
Block a user