Updated script that can be controled by Nodejs web app
This commit is contained in:
40
lib/python3.13/site-packages/pandas/tests/indexing/common.py
Normal file
40
lib/python3.13/site-packages/pandas/tests/indexing/common.py
Normal file
@ -0,0 +1,40 @@
|
||||
""" common utilities """
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Literal,
|
||||
)
|
||||
|
||||
|
||||
def _mklbl(prefix: str, n: int):
|
||||
return [f"{prefix}{i}" for i in range(n)]
|
||||
|
||||
|
||||
def check_indexing_smoketest_or_raises(
|
||||
obj,
|
||||
method: Literal["iloc", "loc"],
|
||||
key: Any,
|
||||
axes: Literal[0, 1] | None = None,
|
||||
fails=None,
|
||||
) -> None:
|
||||
if axes is None:
|
||||
axes_list = [0, 1]
|
||||
else:
|
||||
assert axes in [0, 1]
|
||||
axes_list = [axes]
|
||||
|
||||
for ax in axes_list:
|
||||
if ax < obj.ndim:
|
||||
# create a tuple accessor
|
||||
new_axes = [slice(None)] * obj.ndim
|
||||
new_axes[ax] = key
|
||||
axified = tuple(new_axes)
|
||||
try:
|
||||
getattr(obj, method).__getitem__(axified)
|
||||
except (IndexError, TypeError, KeyError) as detail:
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
return
|
||||
raise
|
127
lib/python3.13/site-packages/pandas/tests/indexing/conftest.py
Normal file
127
lib/python3.13/site-packages/pandas/tests/indexing/conftest.py
Normal file
@ -0,0 +1,127 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_ints():
|
||||
return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_ints():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=np.arange(0, 8, 2),
|
||||
columns=np.arange(0, 12, 3),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_uints():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=Index(np.arange(0, 8, 2, dtype=np.uint64)),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_uints():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=Index(range(0, 8, 2), dtype=np.uint64),
|
||||
columns=Index(range(0, 12, 3), dtype=np.uint64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_labels():
|
||||
return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd"))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_labels():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_ts():
|
||||
return Series(
|
||||
np.random.default_rng(2).standard_normal(4),
|
||||
index=date_range("20130101", periods=4),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_ts():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=date_range("20130101", periods=4),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_floats():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=Index(range(0, 8, 2), dtype=np.float64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_floats():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=Index(range(0, 8, 2), dtype=np.float64),
|
||||
columns=Index(range(0, 12, 3), dtype=np.float64),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_mixed():
|
||||
return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_mixed():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_empty():
|
||||
return DataFrame()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_empty():
|
||||
return Series(dtype=object)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_multi():
|
||||
return DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)),
|
||||
index=MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
columns=MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series_multi():
|
||||
return Series(
|
||||
np.random.default_rng(2).random(4),
|
||||
index=MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
)
|
@ -0,0 +1,225 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:3])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:2.5])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5])
|
||||
if indexer_sl is tm.loc:
|
||||
tm.assert_series_equal(expected, ser.loc[-1:3])
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
|
||||
def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == "decreasing":
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
ser = Series(list("abc"), idx)
|
||||
|
||||
for key, expected in zip(idx.left, ser):
|
||||
if idx.closed_left:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.right, ser):
|
||||
if idx.closed_right:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.mid, ser):
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
|
||||
def test_getitem_non_matching(self, series_with_interval_index, indexer_sl):
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# this is a departure from our current
|
||||
# indexing scheme, but simpler
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3]]
|
||||
|
||||
def test_loc_getitem_large_series(self, monkeypatch):
|
||||
size_cutoff = 20
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
ser = Series(
|
||||
np.arange(size_cutoff),
|
||||
index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)),
|
||||
)
|
||||
|
||||
result1 = ser.loc[:8]
|
||||
result2 = ser.loc[0:8]
|
||||
result3 = ser.loc[0:8:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
# CategoricalIndex with IntervalIndex categories
|
||||
df = DataFrame({"A": range(10)})
|
||||
ser = pd.cut(df.A, 5)
|
||||
df["B"] = ser
|
||||
df = df.set_index("B")
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="10"):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
r"None of \[Index\(\[10\], dtype='object', name='B'\)\] "
|
||||
r"are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError, match=r"\[10\] not in index"):
|
||||
df.loc[[10, 4]]
|
||||
|
||||
def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
|
||||
# GH#41831
|
||||
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
key = index[:-1]
|
||||
|
||||
obj = frame_or_series(range(2), index=index)
|
||||
if frame_or_series is DataFrame and indexer_sl is tm.setitem:
|
||||
obj = obj.T
|
||||
|
||||
result = indexer_sl(obj)[key]
|
||||
expected = obj
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_setitem_interval_with_slice(self):
|
||||
# GH#54722
|
||||
ii = IntervalIndex.from_breaks(range(4, 15))
|
||||
ser = Series(range(10), index=ii)
|
||||
|
||||
orig = ser.copy()
|
||||
|
||||
# This should be a no-op (used to raise)
|
||||
ser.loc[1:3] = 20
|
||||
tm.assert_series_equal(ser, orig)
|
||||
|
||||
ser.loc[6:8] = 19
|
||||
orig.iloc[1:4] = 19
|
||||
tm.assert_series_equal(ser, orig)
|
||||
|
||||
ser2 = Series(range(5), index=ii[::2])
|
||||
orig2 = ser2.copy()
|
||||
|
||||
# this used to raise
|
||||
ser2.loc[6:8] = 22 # <- raises on main, sets on branch
|
||||
orig2.iloc[1] = 22
|
||||
tm.assert_series_equal(ser2, orig2)
|
||||
|
||||
ser2.loc[5:7] = 21
|
||||
orig2.iloc[:2] = 21
|
||||
tm.assert_series_equal(ser2, orig2)
|
||||
|
||||
|
||||
class TestIntervalIndexInsideMultiIndex:
|
||||
def test_mi_intervalindex_slicing_with_scalar(self):
|
||||
# GH#27456
|
||||
ii = IntervalIndex.from_arrays(
|
||||
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
|
||||
)
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
|
||||
pd.Index(
|
||||
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
|
||||
),
|
||||
ii,
|
||||
]
|
||||
)
|
||||
|
||||
idx.names = ["Item", "RID", "MP"]
|
||||
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
|
||||
df.index = idx
|
||||
|
||||
query_df = DataFrame(
|
||||
{
|
||||
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
|
||||
"RID": ["RID1", "RID1", "RID1", "RID2", "RID2"],
|
||||
"MP": [0.2, 1.5, 1.6, 11.1, 10.9],
|
||||
}
|
||||
)
|
||||
|
||||
query_df = query_df.sort_index()
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
|
||||
query_df.index = idx
|
||||
result = df.value.loc[query_df.index]
|
||||
|
||||
# the IntervalIndex level is indexed with floats, which map to
|
||||
# the intervals containing them. Matching the behavior we would get
|
||||
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
|
||||
sliced_level = ii.take([0, 1, 1, 3, 2])
|
||||
expected_index = pd.MultiIndex.from_arrays(
|
||||
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
|
||||
)
|
||||
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base",
|
||||
[101, 1010],
|
||||
)
|
||||
def test_reindex_behavior_with_interval_index(self, base):
|
||||
# GH 51826
|
||||
|
||||
ser = Series(
|
||||
range(base),
|
||||
index=IntervalIndex.from_arrays(range(base), range(1, base + 1)),
|
||||
)
|
||||
expected_result = Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)
|
||||
result = ser.reindex(index=[np.nan, 1.0])
|
||||
tm.assert_series_equal(result, expected_result)
|
@ -0,0 +1,229 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self, series_with_interval_index, indexer_sl):
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = ser.iloc[3:5]
|
||||
result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
|
||||
indexer_sl(ser)[Interval(3, 5, closed="left")]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
|
||||
):
|
||||
indexer_sl(ser)[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
|
||||
indexer_sl(ser)[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
assert indexer_sl(ser)[1] == 0
|
||||
assert indexer_sl(ser)[1.5] == 1
|
||||
assert indexer_sl(ser)[2] == 1
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
def test_loc_with_slices(self, series_with_interval_index, indexer_sl):
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[3:]
|
||||
result = indexer_sl(ser)[Interval(3, 4) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "Interval objects are not currently supported"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 6) :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 4, closed="left") :]
|
||||
|
||||
def test_slice_step_ne1(self, series_with_interval_index):
|
||||
# GH#31658 slice of scalar with step != 1
|
||||
ser = series_with_interval_index.copy()
|
||||
expected = ser.iloc[0:4:2]
|
||||
|
||||
result = ser[0:4:2]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result2 = ser[0:4][::2]
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
def test_slice_float_start_stop(self, series_with_interval_index):
|
||||
# GH#31658 slicing with integers is positional, with floats is not
|
||||
# supported
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[1.5:9.5:2]
|
||||
|
||||
def test_slice_interval_step(self, series_with_interval_index):
|
||||
# GH#31658 allows for integer step!=1, not Interval step
|
||||
ser = series_with_interval_index.copy()
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[0 : 4 : Interval(0, 1)]
|
||||
|
||||
def test_loc_with_overlap(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = indexer_sl(ser)[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(1, 5)]
|
||||
assert expected == result
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
msg = (
|
||||
r"None of \[IntervalIndex\(\[\(3, 5\]\], "
|
||||
r"dtype='interval\[int64, right\]'\)\] are in the \[index\]"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = (
|
||||
"'can only get slices from an IntervalIndex if bounds are "
|
||||
"non-overlapping and all monotonic increasing or decreasing'"
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)]
|
||||
|
||||
if indexer_sl is tm.loc:
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[1:4]
|
||||
|
||||
def test_non_unique(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
expected = ser.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self, indexer_sl):
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 3) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_loc_getitem_missing_key_error_message(
|
||||
self, frame_or_series, series_with_interval_index
|
||||
):
|
||||
# GH#27365
|
||||
ser = series_with_interval_index.copy()
|
||||
obj = frame_or_series(ser)
|
||||
with pytest.raises(KeyError, match=r"\[6\]"):
|
||||
obj.loc[[4, 5, 6]]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"intervals",
|
||||
[
|
||||
([Interval(-np.inf, 0.0), Interval(0.0, 1.0)]),
|
||||
([Interval(-np.inf, -2.0), Interval(-2.0, -1.0)]),
|
||||
([Interval(-1.0, 0.0), Interval(0.0, np.inf)]),
|
||||
([Interval(1.0, 2.0), Interval(2.0, np.inf)]),
|
||||
],
|
||||
)
|
||||
def test_repeating_interval_index_with_infs(intervals):
|
||||
# GH 46658
|
||||
|
||||
interval_index = Index(intervals * 51)
|
||||
|
||||
expected = np.arange(1, 102, 2, dtype=np.intp)
|
||||
result = interval_index.get_indexer_for([intervals[1]])
|
||||
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):
|
||||
# Inplace ops, originally from:
|
||||
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
|
||||
events = {
|
||||
("eyes", "left"): a,
|
||||
("eyes", "right"): b,
|
||||
("ears", "left"): c,
|
||||
("ears", "right"): d,
|
||||
}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
|
||||
zed = DataFrame(events, index=["a", "b"], columns=multiind)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
elif warn_copy_on_write:
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.assert_produces_warning(None):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
|
||||
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.default_rng(2).random((10, 3))
|
||||
df = DataFrame(a, columns=["x", "y", "z"])
|
||||
df_original = df.copy()
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["z"].iloc[0] = 1.0
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
|
||||
else:
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), "z"] = 2
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 2
|
||||
|
||||
|
||||
def test_indexer_caching(monkeypatch):
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
size_cutoff = 20
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
|
||||
s = Series(np.zeros(size_cutoff), index=index)
|
||||
|
||||
# setitem
|
||||
s[s == 0] = 1
|
||||
expected = Series(np.ones(size_cutoff), index=index)
|
||||
tm.assert_series_equal(s, expected)
|
@ -0,0 +1,50 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(["a", "a", "a", "b", "b"])
|
||||
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
|
||||
s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc["a", Period("2012-01")]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc["a", datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_multiindex_datetime_columns():
|
||||
# GH35015, using datetime as column indices raises exception
|
||||
|
||||
mi = MultiIndex.from_tuples(
|
||||
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
|
||||
)
|
||||
|
||||
df = DataFrame([], columns=mi)
|
||||
|
||||
expected_df = DataFrame(
|
||||
[],
|
||||
columns=MultiIndex.from_arrays(
|
||||
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
|
||||
),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
@ -0,0 +1,410 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"access_method",
|
||||
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"level1_value, expected",
|
||||
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
|
||||
)
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
|
||||
ser = Series([1, 2, 3], index=mi)
|
||||
expected.index.name = "A"
|
||||
|
||||
result = access_method(ser, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0_value", ["D", "A"])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
arr = np.random.default_rng(2).standard_normal((len(index), 1))
|
||||
df = DataFrame(arr, index=index, columns=["val"])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != "A":
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val["A"]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val["X"]
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(
|
||||
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer_sl(s)[2000, 3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer_sl
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer_sl(s)[2000, 3, 10]
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error,expected_error_msg",
|
||||
[
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
|
||||
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
|
||||
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
|
||||
(
|
||||
lambda s: s.iloc[len(s)],
|
||||
IndexError,
|
||||
"single positional indexer is out-of-bounds",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
indexer,
|
||||
expected_error,
|
||||
expected_error_msg,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df["foo", "one"].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error_msg",
|
||||
[
|
||||
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df["foobar"], r"^'foobar'$"),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_tuple_string_column_names():
|
||||
# GH#50372
|
||||
mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
|
||||
df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
|
||||
df["single_index"] = 0
|
||||
|
||||
df_flat = df.copy()
|
||||
df_flat.columns = df_flat.columns.to_flat_index()
|
||||
df_flat["new_single_index"] = 0
|
||||
|
||||
result = df_flat[[("a", "aa"), "new_single_index"]]
|
||||
expected = DataFrame(
|
||||
[[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
|
||||
df.columns = [
|
||||
["level1 item1", "level1 item2"],
|
||||
["", "level2 item2"],
|
||||
["level3 item1", "level3 item2"],
|
||||
]
|
||||
|
||||
result = df["level1 item1"]
|
||||
expected = DataFrame(
|
||||
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_slice",
|
||||
[
|
||||
(lambda df: df["foo"], slice(3)),
|
||||
(lambda df: df["bar"], slice(3, 5)),
|
||||
(lambda df: df.loc[:, "bar"], slice(3, 5)),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mixed_depth_get():
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df["a"]
|
||||
expected = df["a", "", ""].rename("a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["routine1", "result1"]
|
||||
expected = df["routine1", "result1", ""]
|
||||
expected = expected.rename(("routine1", "result1"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_nan_multiindex(nulls_fixture):
|
||||
# GH#29751
|
||||
# loc on a multiindex containing nan values
|
||||
n = nulls_fixture # for code readability
|
||||
cols = ["a", "b", "c"]
|
||||
df = DataFrame(
|
||||
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
|
||||
columns=cols,
|
||||
).set_index(["a", "b"])
|
||||
df["c"] = df["c"].astype("int64")
|
||||
|
||||
idx = (21, n)
|
||||
result = df.loc[:idx]
|
||||
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[idx:]
|
||||
expected = DataFrame(
|
||||
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
|
||||
).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx1, idx2 = (21, n), (31, n)
|
||||
result = df.loc[idx1:idx2]
|
||||
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected",
|
||||
[
|
||||
(
|
||||
(["b"], ["bar", np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["a", "b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], ["bar"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2], [5]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar")]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], [np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[3], [6]],
|
||||
columns=MultiIndex(
|
||||
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_nan_cols_multiindex(
|
||||
indexer,
|
||||
expected,
|
||||
nulls_fixture,
|
||||
):
|
||||
# Slicing MultiIndex including levels with nan values, for more information
|
||||
# see GH#25154
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
|
||||
result = df.loc[:, indexer]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
|
||||
index = ["h1", "h3", "h5"]
|
||||
columns = MultiIndex(
|
||||
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=["main", "sub"],
|
||||
)
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
|
||||
)
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(["h1", "h3", "h5"])
|
||||
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
|
||||
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
|
||||
result = df["A"]["A1"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame(
|
||||
[["d", 4, 4], ["e", 5, 5]],
|
||||
index=Index(["B2", "B2"], name="sub"),
|
||||
columns=["h1", "h3", "h5"],
|
||||
).T
|
||||
result = df["A"]["B2"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_empty_slice():
|
||||
# GH 15454
|
||||
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
|
||||
result = df[[]]
|
||||
expected = DataFrame(
|
||||
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_empty_multiindex():
|
||||
# GH#36936
|
||||
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
|
||||
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
|
||||
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
|
||||
# loc on empty multiindex == loc with False mask
|
||||
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
|
||||
result = df.loc[empty_multiindex, :]
|
||||
expected = df.loc[[False] * len(df.index), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# replacing value with loc on empty multiindex
|
||||
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
|
||||
result = df
|
||||
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
|
||||
data = np.random.default_rng(2).standard_normal((3, 3))
|
||||
return DataFrame(
|
||||
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(
|
||||
lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs("b", drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.default_rng(2).standard_normal((4, 3))
|
||||
df = DataFrame(
|
||||
arr,
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
|
||||
)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
|
||||
)
|
||||
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ("Ohio", "Red")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [
|
||||
["str00", "str01"],
|
||||
["str10", "str11"],
|
||||
["str20", "srt21"],
|
||||
["str30", "str31"],
|
||||
["str40", "str41"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
|
||||
)
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, indexes, values, expected_k",
|
||||
[
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
|
||||
],
|
||||
)
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=["i", "j", "k"])
|
||||
df = df.set_index(["i", "j"])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df["k"] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def m():
|
||||
return 5
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def n():
|
||||
return 100
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cols():
|
||||
return ["jim", "joe", "jolie", "joline", "jolia"]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vals(n):
|
||||
vals = [
|
||||
np.random.default_rng(2).integers(0, 10, n),
|
||||
np.random.default_rng(2).choice(list("abcdefghij"), n),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=10).tolist(), n
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
|
||||
np.random.default_rng(2).standard_normal(n),
|
||||
]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
return vals
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def keys(n, m, vals):
|
||||
# bunch of keys for testing
|
||||
keys = [
|
||||
np.random.default_rng(2).integers(0, 11, m),
|
||||
np.random.default_rng(2).choice(list("abcdefghijk"), m),
|
||||
np.random.default_rng(2).choice(
|
||||
pd.date_range("20141009", periods=11).tolist(), m
|
||||
),
|
||||
np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
|
||||
]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += [t[:-1] for t in vals[:: n // m]]
|
||||
return keys
|
||||
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
@pytest.fixture
|
||||
def df(vals, cols):
|
||||
return DataFrame(vals, columns=cols)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def a(df):
|
||||
return pd.concat([df, df])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def b(df, cols):
|
||||
return df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
|
||||
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
|
||||
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
|
||||
# GH7724, GH2646
|
||||
|
||||
frame = request.getfixturevalue(frame_fixture)
|
||||
if lexsort_depth == 0:
|
||||
df = frame.copy(deep=False)
|
||||
else:
|
||||
df = frame.sort_values(by=cols[:lexsort_depth])
|
||||
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index._lexsort_depth < lexsort_depth
|
||||
for key in keys:
|
||||
mask = np.ones(len(df), dtype=bool)
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[: i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[: i + 1] in mi.index
|
||||
right = df[mask].copy(deep=False)
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
|
||||
assert return_value is None
|
||||
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
return_value = right.set_index(cols[:-1], inplace=True)
|
||||
assert return_value is None
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(
|
||||
right["jolia"].values, name=right.index[0], index=["jolia"]
|
||||
)
|
||||
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
@ -0,0 +1,992 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
IndexingError,
|
||||
PerformanceWarning,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
|
||||
|
||||
|
||||
class TestMultiIndexLoc:
|
||||
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[("bar", "two"), "B"] = 5
|
||||
assert frame.loc[("bar", "two"), "B"] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.loc[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
def test_loc_getitem_general(self, any_real_numpy_dtype):
|
||||
# GH#2817
|
||||
dtype = any_real_numpy_dtype
|
||||
data = {
|
||||
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
"num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
|
||||
}
|
||||
df = DataFrame(data)
|
||||
df = df.astype({"col": dtype, "num": dtype})
|
||||
df = df.set_index(keys=["col", "num"])
|
||||
key = 4.0, 12
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
return_value = df.sort_index(inplace=True)
|
||||
assert return_value is None
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be float64 Index
|
||||
col_arr = np.array([4.0] * 3, dtype=dtype)
|
||||
year_arr = np.array([12] * 3, dtype=dtype)
|
||||
index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
|
||||
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_getitem_multiindex_missing_label_raises(self):
|
||||
# GH#21593
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
def test_loc_getitem_list_of_tuples_with_multiindex(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
ser = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = ser.reindex(ser.index[49:51])
|
||||
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype="float64",
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_labels(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j"], ["X", "X", "Y"]],
|
||||
)
|
||||
|
||||
# the first 2 rows
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc["i"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# 2nd (last) column
|
||||
expected = df.iloc[:, [2]].droplevel(0, axis=1)
|
||||
result = df.loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# bottom right corner
|
||||
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
|
||||
result = df.loc["j"].loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with a tuple
|
||||
expected = df.iloc[[0, 1]]
|
||||
result = df.loc[("i", "X")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_ints(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc[4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_missing_label_raises(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
|
||||
def test_loc_multiindex_list_missing_label(self, key, pos):
|
||||
# GH 27148 - lists with missing labels _do_ raise
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[key]
|
||||
|
||||
def test_loc_multiindex_too_many_dims_raises(self):
|
||||
# GH 14885
|
||||
s = Series(
|
||||
range(8),
|
||||
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
|
||||
s.loc["a", "b"]
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
|
||||
s.loc["a", "d", "g"]
|
||||
with pytest.raises(IndexingError, match="Too many indexers"):
|
||||
s.loc["a", "d", "g", "j"]
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ["Attribute" + str(i) for i in range(1)]
|
||||
attribute_values = ["Value" + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
|
||||
)
|
||||
|
||||
expected = df.loc[([1, 2],), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.loc[:, "a":"c"]
|
||||
|
||||
result = s.loc[0:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer getitem with list of indexers skips wrong element
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(len(single_level)),
|
||||
index=single_level,
|
||||
)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
|
||||
)
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys)
|
||||
)
|
||||
if indexer_type_1 is set or indexer_type_2 is set:
|
||||
with pytest.raises(TypeError, match="as an indexer is not supported"):
|
||||
df.loc[indexer, "Data"]
|
||||
|
||||
return
|
||||
else:
|
||||
result = df.loc[indexer, "Data"]
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = frame_or_series([1, 2], index=mi)
|
||||
obj.loc[("a",)] = 0
|
||||
expected = frame_or_series([0, 2], index=mi)
|
||||
tm.assert_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [("a",), ("a")])
|
||||
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = DataFrame([1, 2], index=mi)
|
||||
obj.loc[indexer, :] = 0
|
||||
expected = DataFrame([0, 2], index=mi)
|
||||
tm.assert_frame_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
|
||||
)
|
||||
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
|
||||
# GH#39147
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
|
||||
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
|
||||
df.loc[indexer, ["c", "d"]] = 1.0
|
||||
expected = DataFrame(
|
||||
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
|
||||
index=mi,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_sorted_multiindex_after_union(self):
|
||||
# GH#44752
|
||||
midx = MultiIndex.from_product(
|
||||
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
|
||||
)
|
||||
ser1 = Series(1, index=midx)
|
||||
ser2 = Series(1, index=midx[:2])
|
||||
df = pd.concat([ser1, ser2], axis=1)
|
||||
expected = df.copy()
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({0: ser1, 1: ser2})
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_no_second_level_index(self):
|
||||
# GH#43599
|
||||
df = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
|
||||
columns=["Val"],
|
||||
)
|
||||
res = df.loc[np.s_[:, "c", :]]
|
||||
expected = DataFrame(
|
||||
index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
|
||||
)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_multi_index_key_error(self):
|
||||
# GH 51892
|
||||
df = DataFrame(
|
||||
{
|
||||
(1, 2): ["a", "b", "c"],
|
||||
(1, 3): ["d", "e", "f"],
|
||||
(2, 2): ["g", "h", "i"],
|
||||
(2, 4): ["j", "k", "l"],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"(1, 4)"):
|
||||
df.loc[0, (1, 4)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, pos",
|
||||
[
|
||||
([], []), # empty ok
|
||||
(["A"], slice(3)),
|
||||
(["A", "D"], []), # "D" isn't present -> raise
|
||||
(["D", "E"], []), # no values found -> raise
|
||||
(["D"], []), # same, with single item list: GH 27148
|
||||
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
|
||||
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
|
||||
)
|
||||
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
|
||||
expected = ser.iloc[pos]
|
||||
|
||||
if expected.size == 0 and indexer != []:
|
||||
with pytest.raises(KeyError, match=str(indexer)):
|
||||
ser.loc[indexer]
|
||||
elif indexer == (slice(None), ["foo", "bah"]):
|
||||
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
|
||||
with pytest.raises(KeyError, match="'bah'"):
|
||||
ser.loc[indexer]
|
||||
else:
|
||||
result = ser.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 6)),
|
||||
index=range(5),
|
||||
columns=multi_index,
|
||||
)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame(
|
||||
[[np.mean, np.median], ["mean", "median"]],
|
||||
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
|
||||
index=["function", "name"],
|
||||
)
|
||||
result = df.loc["function", ("functs", "mean")]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.arange(10),
|
||||
"b": np.arange(10),
|
||||
"c": np.random.default_rng(2).standard_normal(10),
|
||||
"d": np.random.default_rng(2).standard_normal(10),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
|
||||
df.loc[("bar", "three"), "B"]
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[("bar", "three"), "B"] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[("bar", "three"), "B"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_setitem_single_column_slice():
|
||||
# case from https://github.com/pandas-dev/pandas/issues/27841
|
||||
df = DataFrame(
|
||||
"string",
|
||||
index=list("abcd"),
|
||||
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
|
||||
)
|
||||
df["labels"] = "a"
|
||||
df.loc[:, "labels"] = df.index
|
||||
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
|
||||
|
||||
# test with non-object block
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
index=range(4),
|
||||
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
|
||||
)
|
||||
expected = df.copy()
|
||||
df.loc[:, "B"] = np.arange(4)
|
||||
expected.iloc[:, 2] = np.arange(4)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_nan_multiindex(using_infer_string):
|
||||
# GH 5286
|
||||
tups = [
|
||||
("Good Things", "C", np.nan),
|
||||
("Good Things", "R", np.nan),
|
||||
("Bad Things", "C", np.nan),
|
||||
("Bad Things", "T", np.nan),
|
||||
("Okay Things", "N", "B"),
|
||||
("Okay Things", "N", "D"),
|
||||
("Okay Things", "B", np.nan),
|
||||
("Okay Things", "D", np.nan),
|
||||
]
|
||||
df = DataFrame(
|
||||
np.ones((8, 4)),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
|
||||
)
|
||||
result = df.loc["Good Things"].loc["C"]
|
||||
expected = DataFrame(
|
||||
np.ones((1, 4)),
|
||||
index=Index(
|
||||
[np.nan],
|
||||
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
|
||||
name="u3",
|
||||
),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_period_string_indexing():
|
||||
# GH 9892
|
||||
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
|
||||
i = (1111, 2222, 3333)
|
||||
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
|
||||
df = DataFrame(
|
||||
index=idx,
|
||||
columns=(
|
||||
"OMS",
|
||||
"OMK",
|
||||
"RES",
|
||||
"DRIFT_IND",
|
||||
"OEVRIG_IND",
|
||||
"FIN_IND",
|
||||
"VARE_UD",
|
||||
"LOEN_UD",
|
||||
"FIN_UD",
|
||||
),
|
||||
)
|
||||
result = df.loc[("2013Q1", 1111), "OMS"]
|
||||
|
||||
alt = df.loc[(a[0], 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
# Because the resolution of the string matches, it is an exact lookup,
|
||||
# not a slice
|
||||
assert np.isnan(result)
|
||||
|
||||
alt = df.loc[("2013Q1", 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
|
||||
def test_loc_datetime_mask_slicing():
|
||||
# GH 16699
|
||||
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
|
||||
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
|
||||
df = DataFrame(
|
||||
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
|
||||
)
|
||||
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
|
||||
expected = Series(
|
||||
[3],
|
||||
name="C1",
|
||||
index=MultiIndex.from_tuples(
|
||||
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
|
||||
names=["Idx1", "Idx2"],
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_datetime_series_tuple_slicing():
|
||||
# https://github.com/pandas-dev/pandas/issues/35858
|
||||
date = pd.Timestamp("2000")
|
||||
ser = Series(
|
||||
1,
|
||||
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
|
||||
name="c",
|
||||
)
|
||||
result = ser.loc[:, [date]]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_loc_with_mi_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/35351
|
||||
df = DataFrame(
|
||||
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples(
|
||||
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
|
||||
),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
|
||||
result = df.loc[idx, :]
|
||||
expected = DataFrame(
|
||||
[["a", 1], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_mi_with_level1_named_0():
|
||||
# GH#37194
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
ser = Series(range(3), index=dti)
|
||||
df = ser.to_frame()
|
||||
df[1] = dti
|
||||
|
||||
df2 = df.set_index(0, append=True)
|
||||
assert df2.index.names == (None, 0)
|
||||
df2.index.get_loc(dti[0]) # smoke test
|
||||
|
||||
result = df2.loc[dti[0]]
|
||||
expected = df2.iloc[[0]].droplevel(None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2 = df2[1]
|
||||
assert ser2.index.names == (None, 0)
|
||||
|
||||
result = ser2.loc[dti[0]]
|
||||
expected = ser2.iloc[[0]].droplevel(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_str_slice():
|
||||
# GH#15928
|
||||
df = DataFrame(
|
||||
[
|
||||
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
|
||||
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
|
||||
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
|
||||
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
|
||||
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
|
||||
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
|
||||
],
|
||||
columns="time,ticker,bid,ask".split(","),
|
||||
)
|
||||
df2 = df.set_index(["ticker", "time"]).sort_index()
|
||||
|
||||
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
|
||||
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_3levels_leading_period_index():
|
||||
# GH#24091
|
||||
pi = pd.PeriodIndex(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
|
||||
name="datetime",
|
||||
freq="D",
|
||||
)
|
||||
lev2 = ["A", "A", "Z", "W"]
|
||||
lev3 = ["B", "C", "Q", "F"]
|
||||
mi = MultiIndex.from_arrays([pi, lev2, lev3])
|
||||
|
||||
ser = Series(range(4), index=mi, dtype=np.float64)
|
||||
result = ser.loc[(pi[0], "A", "B")]
|
||||
assert result == 0.0
|
||||
|
||||
|
||||
class TestKeyErrorsWithMultiIndex:
|
||||
def test_missing_keys_raises_keyerror(self):
|
||||
# GH#27420 KeyError, not TypeError
|
||||
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
|
||||
df2 = df.set_index(["A", "B"])
|
||||
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df2.loc[(1, 6)]
|
||||
|
||||
def test_missing_key_raises_keyerror2(self):
|
||||
# GH#21168 KeyError, not "IndexingError: Too many indexers"
|
||||
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
|
||||
|
||||
with pytest.raises(KeyError, match=r"\(0, 3\)"):
|
||||
ser.loc[0, 3]
|
||||
|
||||
def test_missing_key_combination(self):
|
||||
# GH: 19556
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
np.array(["a", "a", "b", "b"]),
|
||||
np.array(["1", "2", "2", "3"]),
|
||||
np.array(["c", "d", "c", "d"]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
|
||||
msg = r"\('b', '1', slice\(None, None, None\)\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[("b", "1", slice(None)), :]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.index.get_locs(("b", "1", slice(None)))
|
||||
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
|
||||
df.loc[("b", "1"), :]
|
||||
|
||||
|
||||
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
result = ser[2000, 5]
|
||||
expected = df.loc[2000, 5]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_with_nan():
|
||||
# GH: 27104
|
||||
df = DataFrame(
|
||||
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
|
||||
).set_index(["ind1", "ind2"])
|
||||
result = df.loc[["a"]]
|
||||
expected = DataFrame(
|
||||
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["a"]
|
||||
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_non_found_tuple():
|
||||
# GH: 25236
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
|
||||
["a", "b", "c"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
|
||||
df.loc[(2.0, 2.0, 3.0)]
|
||||
|
||||
|
||||
def test_get_loc_datetime_index():
|
||||
# GH#24263
|
||||
index = pd.date_range("2001-01-01", periods=100)
|
||||
mi = MultiIndex.from_arrays([index])
|
||||
# Check if get_loc matches for Index and MultiIndex
|
||||
assert mi.get_loc("2001-01") == slice(0, 31, None)
|
||||
assert index.get_loc("2001-01") == slice(0, 31, None)
|
||||
|
||||
loc = mi[::2].get_loc("2001-01")
|
||||
expected = index[::2].get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.repeat(2).get_loc("2001-01")
|
||||
expected = index.repeat(2).get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.append(mi).get_loc("2001-01")
|
||||
expected = index.append(index).get_loc("2001-01")
|
||||
# TODO: standardize return type for MultiIndex.get_loc
|
||||
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
|
||||
|
||||
|
||||
def test_loc_setitem_indexer_differently_ordered():
|
||||
# GH#34603
|
||||
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
|
||||
|
||||
indexer = ("a", [1, 0])
|
||||
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
|
||||
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_index_differently_ordered_slice_none():
|
||||
# GH#31330
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6], [7, 8]],
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
result = df.loc[(slice(None), [2, 1]), :]
|
||||
expected = DataFrame(
|
||||
[[3, 4], [7, 8], [1, 2], [5, 6]],
|
||||
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
|
||||
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
|
||||
# GH#40978
|
||||
df = DataFrame(
|
||||
[1] * 8,
|
||||
index=MultiIndex.from_tuples(
|
||||
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
|
||||
),
|
||||
columns=["a"],
|
||||
)
|
||||
result = df.loc[(slice(None), indexer), :]
|
||||
expected = DataFrame(
|
||||
[1] * 8,
|
||||
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
|
||||
columns=["a"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[df.index.isin(indexer, level=1), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_loc_getitem_drops_levels_for_one_row_dataframe():
|
||||
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
|
||||
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
|
||||
df = DataFrame({"d": [0]}, index=mi)
|
||||
expected = df.droplevel([0, 2])
|
||||
result = df.loc["x", :, "z"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser = Series([0], index=mi)
|
||||
result = ser.loc["x", :, "z"]
|
||||
expected = Series([0], index=Index(["y"], name="b"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_columns_loc_list_label_order():
|
||||
# GH 10710
|
||||
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
|
||||
df = DataFrame(np.zeros((5, 6)), columns=cols)
|
||||
result = df.loc[:, ["B", "A"]]
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 4)),
|
||||
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_partial_indexing_list_raises():
|
||||
# GH 13501
|
||||
frame = DataFrame(
|
||||
np.arange(12).reshape((4, 3)),
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
|
||||
)
|
||||
frame.index.names = ["key1", "key2"]
|
||||
frame.columns.names = ["state", "color"]
|
||||
with pytest.raises(KeyError, match="\\[2\\] not in index"):
|
||||
frame.loc[["b", 2], "Colorado"]
|
||||
|
||||
|
||||
def test_mi_indexing_list_nonexistent_raises():
|
||||
# GH 15452
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
|
||||
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
|
||||
s.loc[["not", "found"]]
|
||||
|
||||
|
||||
def test_mi_add_cell_missing_row_non_unique():
|
||||
# GH 16018
|
||||
result = DataFrame(
|
||||
[[1, 2, 5, 6], [3, 4, 7, 8]],
|
||||
index=["a", "a"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
result.loc["c"] = -1
|
||||
result.loc["c", (1, "A")] = 3
|
||||
result.loc["d", (1, "A")] = 3
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 5.0, 6.0],
|
||||
[3.0, 4.0, 7.0, 8.0],
|
||||
[3.0, -1.0, -1, -1],
|
||||
[3.0, np.nan, np.nan, np.nan],
|
||||
],
|
||||
index=["a", "a", "c", "d"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_get_scalar_casting_to_float():
|
||||
# GH#41369
|
||||
df = DataFrame(
|
||||
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
|
||||
)
|
||||
result = df.loc[(3, 4), "b"]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
result = df.loc[[(3, 4)], "b"].iloc[0]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
|
||||
|
||||
def test_loc_empty_single_selector_with_names():
|
||||
# GH 19517
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
|
||||
s2 = Series(index=idx, dtype=np.float64)
|
||||
result = s2.loc["a"]
|
||||
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_keyerror_rightmost_key_missing():
|
||||
# GH 20951
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [100, 100, 200, 200, 300, 300],
|
||||
"B": [10, 10, 20, 21, 31, 33],
|
||||
"C": range(6),
|
||||
}
|
||||
)
|
||||
df = df.set_index(["A", "B"])
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[(100, 1)]
|
||||
|
||||
|
||||
def test_multindex_series_loc_with_tuple_label():
|
||||
# GH#43908
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
|
||||
ser = Series([1, 2], index=mi)
|
||||
result = ser.loc[(3, (4, 5))]
|
||||
assert result == 2
|
@ -0,0 +1,235 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._libs.index as libindex
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.boolean import BooleanDtype
|
||||
|
||||
|
||||
class TestMultiIndexBasic:
|
||||
def test_multiindex_perf_warn(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.default_rng(2).random(4),
|
||||
}
|
||||
).set_index(["jim", "joe"])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(1, "z")]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0,)]
|
||||
|
||||
@pytest.mark.parametrize("offset", [-5, 5])
|
||||
def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset):
|
||||
size_cutoff = 20
|
||||
n = size_cutoff + offset
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
# GH 3588
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
result = df.set_index(["a", "b"], drop=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index(["R1", "R2", np.nan, "R4"], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_exclusive_nat_column_indexing(self):
|
||||
# GH 38025
|
||||
# test multi indexing when one column exclusively contains NaT values
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
df = df.set_index(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_nested_tuples_duplicates(self):
|
||||
# GH#30892
|
||||
|
||||
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
|
||||
idx = Index(["a", "a", "c"])
|
||||
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
|
||||
|
||||
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
|
||||
|
||||
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
|
||||
|
||||
df2 = df.copy(deep=True)
|
||||
df2.loc[(dti[0], "a"), "c2"] = 1.0
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df3 = df.copy(deep=True)
|
||||
df3.loc[[(dti[0], "a")], "c2"] = 1.0
|
||||
tm.assert_frame_equal(df3, expected)
|
||||
|
||||
def test_multiindex_with_datatime_level_preserves_freq(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/35563
|
||||
idx = Index(range(2), name="A")
|
||||
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
|
||||
mi = MultiIndex.from_product([idx, dti])
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi)
|
||||
result = df.loc[0].index
|
||||
tm.assert_index_equal(result, dti)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
def test_multiindex_complex(self):
|
||||
# GH#42145
|
||||
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
|
||||
non_complex_data = [3, 4, 5]
|
||||
result = DataFrame(
|
||||
{
|
||||
"x": complex_data,
|
||||
"y": non_complex_data,
|
||||
"z": non_complex_data,
|
||||
}
|
||||
)
|
||||
result.set_index(["x", "y"], inplace=True)
|
||||
expected = DataFrame(
|
||||
{"z": non_complex_data},
|
||||
index=MultiIndex.from_arrays(
|
||||
[complex_data, non_complex_data],
|
||||
names=("x", "y"),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rename_multiindex_with_duplicates(self):
|
||||
# GH 38015
|
||||
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
df = DataFrame(index=mi)
|
||||
df = df.rename(index={"A": "Apple"}, level=0)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
expected = DataFrame(index=mi2)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_align_multiindex_with_nan_overlap_only(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
|
||||
expected1 = Series([1.0, np.nan, 2.0], index=mi)
|
||||
expected2 = Series([np.nan, 2.0, 1.0], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_series_align_multiindex_with_nan(self):
|
||||
# GH 38439
|
||||
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
|
||||
ser1 = Series([1, 2], index=mi1)
|
||||
ser2 = Series([1, 2], index=mi2)
|
||||
result1, result2 = ser1.align(ser2)
|
||||
|
||||
mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
|
||||
expected1 = Series([1, 2], index=mi)
|
||||
expected2 = Series([2, 1], index=mi)
|
||||
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_nunique_smoke(self):
|
||||
# GH 34019
|
||||
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
|
||||
assert n == 1
|
||||
|
||||
def test_multiindex_repeated_keys(self):
|
||||
# GH19414
|
||||
tm.assert_series_equal(
|
||||
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
|
||||
["a", "a", "b", "b"]
|
||||
],
|
||||
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
|
||||
)
|
||||
|
||||
def test_multiindex_with_na_missing_key(self):
|
||||
# GH46173
|
||||
df = DataFrame.from_dict(
|
||||
{
|
||||
("foo",): [1, 2, 3],
|
||||
("bar",): [5, 6, 7],
|
||||
(None,): [8, 9, 0],
|
||||
}
|
||||
)
|
||||
with pytest.raises(KeyError, match="missing_key"):
|
||||
df[[("missing_key",)]]
|
||||
|
||||
def test_multiindex_dtype_preservation(self):
|
||||
# GH51261
|
||||
columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"])
|
||||
df = DataFrame(["value"], columns=columns).astype("category")
|
||||
df_no_multiindex = df["A"]
|
||||
assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype)
|
||||
|
||||
# geopandas 1763 analogue
|
||||
df = DataFrame(
|
||||
[[1, 0], [0, 1]],
|
||||
columns=[
|
||||
["foo", "foo"],
|
||||
["location", "location"],
|
||||
["x", "y"],
|
||||
],
|
||||
).assign(bools=Series([True, False], dtype="boolean"))
|
||||
assert isinstance(df["bools"].dtype, BooleanDtype)
|
||||
|
||||
def test_multiindex_from_tuples_with_nan(self):
|
||||
# GH#23578
|
||||
result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")])
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,269 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexPartial:
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ["a", "b"]
|
||||
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2), columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(
|
||||
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
|
||||
)
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs("foo")
|
||||
result2 = frame.loc["foo"]
|
||||
expected = frame.T["foo"].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1],
|
||||
[0, 1, 0, 1, 0, 1, 0, 1],
|
||||
],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=index,
|
||||
columns=list("abcd"),
|
||||
)
|
||||
|
||||
result = df.xs(("foo", "one"))
|
||||
expected = df.loc["foo", "one"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc["bar":"baz"]
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(
|
||||
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
|
||||
)
|
||||
df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
|
||||
|
||||
result = df.loc[("a", "y"), :]
|
||||
expected = df.loc[("a", "y")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[("a", "y"), [1, 0]]
|
||||
expected = df.loc[("a", "y")][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
|
||||
df.loc[("a", "foo"), :]
|
||||
|
||||
# TODO(ArrayManager) rewrite test to not use .values
|
||||
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_partial_set(
|
||||
self,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
using_copy_on_write,
|
||||
warn_copy_on_write,
|
||||
):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.iloc[65:85] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
df.loc[(2000, 4), "A"] = 1
|
||||
else:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].loc[2000, 4] = 1
|
||||
exp.iloc[65:85, 0] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.iloc[:100] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].iloc[14] = 5
|
||||
if using_copy_on_write:
|
||||
assert df["A"].iloc[14] == exp["A"].iloc[14]
|
||||
else:
|
||||
assert df["A"].iloc[14] == 5
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, float])
|
||||
def test_getitem_intkey_leading_level(
|
||||
self, multiindex_year_month_day_dataframe_random_data, dtype
|
||||
):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
levels = ymd.index.levels
|
||||
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
|
||||
ser = ymd["A"]
|
||||
mi = ser.index
|
||||
assert isinstance(mi, MultiIndex)
|
||||
if dtype is int:
|
||||
assert mi.levels[0].dtype == np.dtype(int)
|
||||
else:
|
||||
assert mi.levels[0].dtype == np.float64
|
||||
|
||||
assert 14 not in mi.levels[0]
|
||||
assert not mi.levels[0]._should_fallback_to_positional
|
||||
assert not mi._should_fallback_to_positional
|
||||
|
||||
with pytest.raises(KeyError, match="14"):
|
||||
ser[14]
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_idx, exp_values",
|
||||
[
|
||||
(
|
||||
slice("2019-2", None),
|
||||
DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
|
||||
[2, 3],
|
||||
),
|
||||
(
|
||||
slice(None, "2019-2"),
|
||||
date_range("2019", periods=2, freq="MS"),
|
||||
[0, 1, 2, 3],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
|
||||
# GH: 25165
|
||||
date_idx = date_range("2019", periods=2, freq="MS")
|
||||
df = DataFrame(
|
||||
list(range(4)),
|
||||
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
expected = DataFrame(
|
||||
exp_values,
|
||||
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
result = df[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis=0)[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = df.swaplevel(0, 1).sort_index()
|
||||
expected = expected.swaplevel(0, 1).sort_index()
|
||||
|
||||
result = df2.loc[:, indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_partial_both_axis():
|
||||
# gh-12660
|
||||
iterables = [["a", "b"], [2, 1]]
|
||||
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
|
||||
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
|
||||
)
|
||||
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
|
||||
result = df.loc["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,589 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SettingWithCopyError
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
|
||||
class TestMultiIndexSetItem:
|
||||
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
|
||||
target.loc[indexers] = value
|
||||
result = target.loc[indexers]
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
# GH#7190
|
||||
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
|
||||
)
|
||||
t, n = 0, 2
|
||||
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
columns=cols,
|
||||
index=index,
|
||||
)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=0)
|
||||
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=1)
|
||||
|
||||
df = DataFrame(columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=2)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=((t, n), "X"),
|
||||
value=np.array(3),
|
||||
expected=3,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex2(self):
|
||||
# GH#5206
|
||||
df = DataFrame(
|
||||
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
|
||||
)
|
||||
df["F"] = 99
|
||||
row_selection = df["A"] % 2 == 0
|
||||
col_selection = ["B", "C"]
|
||||
df.loc[row_selection, col_selection] = df["F"]
|
||||
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
|
||||
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df["F"],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex3(self):
|
||||
# GH#11372
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
|
||||
)
|
||||
cols = MultiIndex.from_product(
|
||||
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
|
||||
)
|
||||
|
||||
subidx = MultiIndex.from_arrays(
|
||||
[["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
|
||||
)
|
||||
subcols = MultiIndex.from_arrays(
|
||||
[["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# set all columns
|
||||
vals = DataFrame(
|
||||
np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
|
||||
)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# identity
|
||||
copy = df.copy()
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(df.index, df.columns),
|
||||
value=df,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=copy,
|
||||
)
|
||||
|
||||
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
|
||||
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_multiindex_setitem(self):
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [
|
||||
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
|
||||
np.array(["one", "two", "one", "one", "two", "one"]),
|
||||
np.arange(0, 6, 1),
|
||||
]
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 3)),
|
||||
index=arrays,
|
||||
columns=["A", "B", "C"],
|
||||
).sort_index()
|
||||
|
||||
expected = df_orig.loc[["bar"]] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[["bar"]] *= 2
|
||||
tm.assert_frame_equal(df.loc[["bar"]], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
msg = "cannot align on a multi-index with out specifying the join levels"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["bar"] *= 2
|
||||
|
||||
def test_multiindex_setitem2(self):
|
||||
# from SO
|
||||
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict(
|
||||
{
|
||||
"price": {
|
||||
("DE", "Coal", "Stock"): 2,
|
||||
("DE", "Gas", "Stock"): 4,
|
||||
("DE", "Elec", "Demand"): 1,
|
||||
("FR", "Gas", "Stock"): 5,
|
||||
("FR", "Solar", "SupIm"): 0,
|
||||
("FR", "Wind", "SupIm"): 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
df_orig.index = MultiIndex.from_tuples(
|
||||
df_orig.index, names=["Sit", "Com", "Type"]
|
||||
)
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 1, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], "price"] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
df["d"] = np.nan
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df.loc[4, "d"] = arr
|
||||
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
|
||||
|
||||
def test_multiindex_assignment_single_dtype(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH3777 part 2b
|
||||
# single dtype
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
dtype=np.int64,
|
||||
)
|
||||
view = df["c"].iloc[:2].values
|
||||
|
||||
# arr can be losslessly cast to int, so this setitem is inplace
|
||||
# INFO(CoW-warn) this does not warn because we directly took .values
|
||||
# above, so no reference to a pandas object is alive for `view`
|
||||
df.loc[4, "c"] = arr
|
||||
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
|
||||
result = df.loc[4, "c"]
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# extra check for inplace-ness
|
||||
if not using_copy_on_write:
|
||||
tm.assert_numpy_array_equal(view, exp.values)
|
||||
|
||||
# arr + 0.5 cannot be cast losslessly to int, so we upcast
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="item of incompatible dtype"
|
||||
):
|
||||
df.loc[4, "c"] = arr + 0.5
|
||||
result = df.loc[4, "c"]
|
||||
exp = exp + 0.5
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# scalar ok
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, "c"] = 10
|
||||
exp = Series(10, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.loc[4, "c"], exp)
|
||||
|
||||
# invalid assignments
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0]
|
||||
|
||||
# But with a length-1 listlike column indexer this behaves like
|
||||
# `df.loc[4, "c"] = 0
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.loc[4, ["c"]] = [0]
|
||||
assert (df.loc[4, "c"] == 0).all()
|
||||
|
||||
def test_groupby_example(self):
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64,
|
||||
columns=col_names,
|
||||
)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df["new_col"] = np.nan
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
df.loc[name, "new_col"] = new_vals
|
||||
|
||||
def test_series_setitem(
|
||||
self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
|
||||
):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s.iloc[49])
|
||||
|
||||
with pytest.raises(KeyError, match="49"):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
s[49]
|
||||
|
||||
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values.copy()
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match="boolean values only"):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [["t1", "t2"], ["a", "b", "c"]]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
|
||||
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
result = df.loc[df.index[1:3], "value"]
|
||||
tm.assert_series_equal(df["value"][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, "value"] = 10
|
||||
result["value"] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"]
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"].values
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
def test_frame_setitem_multi_column2(self):
|
||||
# ---------------------------------------
|
||||
# GH#1803
|
||||
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df["A"] = 0.0 # Doesn't work
|
||||
assert (df["A"].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df["B", "1"] = [1, 2, 3]
|
||||
df["A"] = df["B", "1"]
|
||||
|
||||
sliced_a1 = df["A", "1"]
|
||||
sliced_a2 = df["A", "2"]
|
||||
sliced_b1 = df["B", "1"]
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ("A", "1")
|
||||
assert sliced_a2.name == ("A", "2")
|
||||
assert sliced_b1.name == ("B", "1")
|
||||
|
||||
def test_loc_getitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
|
||||
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), 4)),
|
||||
index=index,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
res = obj.loc[1:2]
|
||||
exp = obj.reindex(obj.index[2:])
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
obj.loc[1:2] = 7
|
||||
assert (obj.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft["foo", "two"]
|
||||
dft["foo", "two"] = s > s.median()
|
||||
tm.assert_series_equal(dft["foo", "two"], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[("foo", "two")])
|
||||
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_loc(
|
||||
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
frame_original = frame.copy()
|
||||
col = frame["B"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
col[subset] = 97
|
||||
if using_copy_on_write:
|
||||
# chained setitem doesn't work with CoW
|
||||
tm.assert_frame_equal(frame, frame_original)
|
||||
else:
|
||||
assert (frame.loc[subset, "B"] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame(
|
||||
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
|
||||
)
|
||||
|
||||
df = df.set_index(["A", "B"])
|
||||
mi = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[mi, "C"] = "_"
|
||||
|
||||
assert (df.xs((1, 1))["C"] == "_").all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df["A"] = df["A"].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
def test_setitem_nonmonotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31449
|
||||
index = MultiIndex.from_tuples(
|
||||
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
|
||||
)
|
||||
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
|
||||
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
|
||||
expected = DataFrame({"e": [99, 1, 100]}, index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestSetitemWithExpansionMultiIndex:
|
||||
def test_setitem_new_column_mixed_depth(self):
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result["b"] = [1, 2, 3, 4]
|
||||
expected["b", "", ""] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_new_column_all_na(self):
|
||||
# GH#1534
|
||||
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df["new"] = s
|
||||
assert df["new"].isna().all()
|
||||
|
||||
def test_setitem_enlargement_keep_index_names(self):
|
||||
# GH#53053
|
||||
mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
|
||||
df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
|
||||
df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
|
||||
mi_expected = MultiIndex.from_tuples(
|
||||
[(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
data=[[10, 20, 30], [10, 20, 30]],
|
||||
index=mi_expected,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
|
||||
# is not a view
|
||||
def test_frame_setitem_view_direct(
|
||||
multiindex_dataframe_random_data, using_copy_on_write
|
||||
):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write:
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values != 0).all()
|
||||
else:
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(
|
||||
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
else:
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_partial_multiindex():
|
||||
# GH 54875
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [3, 4, 5],
|
||||
"c": 6,
|
||||
"d": 7,
|
||||
}
|
||||
).set_index(["a", "b", "c"])
|
||||
ser = Series(8, index=df.index.droplevel("c"))
|
||||
result = df.copy()
|
||||
result["d"] = ser
|
||||
expected = df.copy()
|
||||
expected["d"] = 8
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,796 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
|
||||
|
||||
class TestMultiIndexSlicers:
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product(
|
||||
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
|
||||
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
|
||||
]
|
||||
]
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ["foo"])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["A", "a"]
|
||||
expected = DataFrame(
|
||||
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
|
||||
index=Index([1, 2, 3], name="two"),
|
||||
columns=Index(["bar", "foo"], name="lvl1"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.to_numpy())), index=ix)
|
||||
result = s.loc["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = s.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in s.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"cannot index with a boolean indexer "
|
||||
"that is not the same length as the index"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[1\] not in index"):
|
||||
# slice(None) is on the index, [1] is on the columns, but 1 is
|
||||
# not in the columns, so we raise
|
||||
# This used to treat [1] as positional GH#16396
|
||||
df.loc[slice(None), [1]]
|
||||
|
||||
# not lexsorted
|
||||
assert df.index._lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index._lexsort_depth == 0
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be "
|
||||
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice("bar")), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 3],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 2],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
200000,
|
||||
200000,
|
||||
]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([["a"] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
|
||||
index=index,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
slice(1, 1),
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
idx[
|
||||
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
|
||||
],
|
||||
idx[1:1],
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
1,
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[
|
||||
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
|
||||
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
|
||||
"DATE": [
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
"2013-10-01",
|
||||
"2013-07-09",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
],
|
||||
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
|
||||
}
|
||||
)
|
||||
|
||||
df["DATE"] = pd.to_datetime(df["DATE"])
|
||||
df1 = df.set_index(["A", "B", "DATE"])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice("A1")), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice("A2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice("B1", "B2")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice("A2"), slice("B0")), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice("B2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 1"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc["A1", ("a", slice("foo"))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(
|
||||
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
|
||||
)
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc["A1", (slice(None), "foo")]
|
||||
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = (
|
||||
DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
.sort_index()
|
||||
.sort_index(axis=1)
|
||||
)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if c in ("C1", "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="columns")[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
for i in [-1, 2, "foo"]:
|
||||
msg = f"No axis named {i} for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc(axis=i)[:, :, ["C1", "C3"]]
|
||||
|
||||
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1":"a2"]
|
||||
expected = df.iloc[:, :-3]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1"]
|
||||
expected = df.iloc[:, :3]
|
||||
expected.columns = ["b1", "b2", "b3"]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_ax_single_level_indexer_simple_df(self):
|
||||
# GH29519
|
||||
# test single level indexing on single index column data frame
|
||||
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
|
||||
result = df.loc(axis=1)["a"]
|
||||
expected = Series(np.array([0, 3, 6]), name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["A", "a"] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype="int64"
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "setting an array element with a sequence."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100], [100, 100]], dtype="int64"
|
||||
)
|
||||
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[100, 100, 100, 100], dtype="int64"
|
||||
)
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
|
||||
(slice(None), 1), (slice(None), ["foo"])
|
||||
]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
|
||||
rhs.loc[:, ("c", "bah")] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
ser = Series(
|
||||
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
|
||||
)
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
|
||||
)
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ["a", "b", "c", "d"]
|
||||
idx = MultiIndex.from_product([freq, range(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc["a"]
|
||||
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc["d"]
|
||||
expected = DataFrame(
|
||||
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = ymd["A"].copy()
|
||||
exp = ymd["A"].copy()
|
||||
s[5:] = 0
|
||||
exp.iloc[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, loc, iloc",
|
||||
[
|
||||
# dtype = int, step = -1
|
||||
("int", slice(None, None, -1), slice(None, None, -1)),
|
||||
("int", slice(3, None, -1), slice(3, None, -1)),
|
||||
("int", slice(None, 1, -1), slice(None, 0, -1)),
|
||||
("int", slice(3, 1, -1), slice(3, 0, -1)),
|
||||
# dtype = int, step = -2
|
||||
("int", slice(None, None, -2), slice(None, None, -2)),
|
||||
("int", slice(3, None, -2), slice(3, None, -2)),
|
||||
("int", slice(None, 1, -2), slice(None, 0, -2)),
|
||||
("int", slice(3, 1, -2), slice(3, 0, -2)),
|
||||
# dtype = str, step = -1
|
||||
("str", slice(None, None, -1), slice(None, None, -1)),
|
||||
("str", slice("d", None, -1), slice(3, None, -1)),
|
||||
("str", slice(None, "b", -1), slice(None, 0, -1)),
|
||||
("str", slice("d", "b", -1), slice(3, 0, -1)),
|
||||
# dtype = str, step = -2
|
||||
("str", slice(None, None, -2), slice(None, None, -2)),
|
||||
("str", slice("d", None, -2), slice(3, None, -2)),
|
||||
("str", slice(None, "b", -2), slice(None, 0, -2)),
|
||||
("str", slice("d", "b", -2), slice(3, 0, -2)),
|
||||
],
|
||||
)
|
||||
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
|
||||
# GH#38071
|
||||
labels = {
|
||||
"str": list("abcde"),
|
||||
"int": range(5),
|
||||
}[dtype]
|
||||
|
||||
mi = MultiIndex.from_arrays([labels] * 2)
|
||||
df = DataFrame(1.0, index=mi, columns=["A"])
|
||||
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
expected = df.iloc[iloc, :]
|
||||
result_get_loc = df.loc[SLC[loc], :]
|
||||
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
|
||||
result_get_locs_level_1 = df.loc[SLC[:, loc], :]
|
||||
|
||||
tm.assert_frame_equal(result_get_loc, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_0, expected)
|
||||
tm.assert_frame_equal(result_get_locs_level_1, expected)
|
@ -0,0 +1,153 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted:
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame(
|
||||
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
|
||||
)
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
|
||||
xp = Series(["x"], index=xp_idx, name="data")
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, : np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("key", [None, lambda x: x])
|
||||
def test_frame_getitem_not_sorted2(self, key):
|
||||
# 13431
|
||||
df = DataFrame(
|
||||
{
|
||||
"col1": ["b", "d", "b", "a"],
|
||||
"col2": [3, 1, 1, 2],
|
||||
"data": ["one", "two", "three", "four"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.set_index(["col1", "col2"])
|
||||
df2_original = df2.copy()
|
||||
|
||||
df2.index = df2.index.set_levels(["b", "d", "a"], level="col1")
|
||||
df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1")
|
||||
assert not df2.index.is_monotonic_increasing
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index(key=key)
|
||||
assert expected.index.is_monotonic_increasing
|
||||
|
||||
result = df2.sort_index(level=0, key=key)
|
||||
assert result.index.is_monotonic_increasing
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_sort_values_key(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
index = index.sort_values( # sort by third letter
|
||||
key=lambda x: x.map(lambda entry: entry[2])
|
||||
)
|
||||
result = DataFrame(range(8), index=index)
|
||||
|
||||
arrays = [
|
||||
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
expected = DataFrame(range(8), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_argsort_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.argsort()
|
||||
expected = np.array([2, 0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_sort_values_with_na(self):
|
||||
# GH48495
|
||||
arrays = [
|
||||
array([2, NA, 1], dtype="Int64"),
|
||||
array([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
index = index.sort_values()
|
||||
result = DataFrame(range(3), index=index)
|
||||
|
||||
arrays = [
|
||||
array([1, 2, NA], dtype="Int64"),
|
||||
array([3, 1, 2], dtype="Int64"),
|
||||
]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
expected = DataFrame(range(3), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df["foo", "four"] = "foo"
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df["foo"]
|
||||
result2 = df.loc[:, "foo"]
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs("foo")
|
||||
result2 = df.loc["foo"]
|
||||
expected = df.reindex(df.index[arrays[0] == "foo"])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s["qux"]
|
||||
result2 = s.loc["qux"]
|
||||
expected = s[arrays[0] == "qux"]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
257
lib/python3.13/site-packages/pandas/tests/indexing/test_at.py
Normal file
257
lib/python3.13/site-packages/pandas/tests/indexing/test_at.py
Normal file
@ -0,0 +1,257 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_at_timezone():
|
||||
# https://github.com/pandas-dev/pandas/issues/33544
|
||||
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
|
||||
expected = DataFrame(
|
||||
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_selection_methods_of_assigned_col():
|
||||
# GH 29282
|
||||
df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0])
|
||||
df["c"] = df2["c"]
|
||||
df.at[1, "c"] = 11
|
||||
result = df
|
||||
expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.at[1, "c"]
|
||||
assert result == 11
|
||||
|
||||
result = df["c"]
|
||||
expected = Series([9, 11, 7], name="c")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df[["c"]]
|
||||
expected = DataFrame({"c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtSetItem:
|
||||
def test_at_setitem_item_cache_cleared(self):
|
||||
# GH#22372 Note the multi-step construction is necessary to trigger
|
||||
# the original bug. pandas/issues/22372#issuecomment-413345309
|
||||
df = DataFrame(index=[0])
|
||||
df["x"] = 1
|
||||
df["cost"] = 2
|
||||
|
||||
# accessing df["cost"] adds "cost" to the _item_cache
|
||||
df["cost"]
|
||||
|
||||
# This loc[[0]] lookup used to call _consolidate_inplace at the
|
||||
# BlockManager level, which failed to clear the _item_cache
|
||||
df.loc[[0]]
|
||||
|
||||
df.at[0, "x"] = 4
|
||||
df.at[0, "cost"] = 789
|
||||
|
||||
expected = DataFrame(
|
||||
{"x": [4], "cost": 789},
|
||||
index=[0],
|
||||
columns=Index(["x", "cost"], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# And in particular, check that the _item_cache has updated correctly.
|
||||
tm.assert_series_equal(df["cost"], expected["cost"])
|
||||
|
||||
def test_at_setitem_mixed_index_assignment(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
ser.at["a"] = 11
|
||||
assert ser.iat[0] == 11
|
||||
ser.at[1] = 22
|
||||
assert ser.iat[3] == 22
|
||||
|
||||
def test_at_setitem_categorical_missing(self):
|
||||
df = DataFrame(
|
||||
index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"])
|
||||
)
|
||||
df.at[1, 1] = "foo"
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[np.nan, np.nan, np.nan],
|
||||
[np.nan, "foo", np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
],
|
||||
dtype=CategoricalDtype(["foo", "bar"]),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_at_setitem_multiindex(self):
|
||||
df = DataFrame(
|
||||
np.zeros((3, 2), dtype="int64"),
|
||||
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
|
||||
)
|
||||
df.at[0, "a"] = 10
|
||||
expected = DataFrame(
|
||||
[[10, 10], [0, 0], [0, 0]],
|
||||
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("row", (Timestamp("2019-01-01"), "2019-01-01"))
|
||||
def test_at_datetime_index(self, row):
|
||||
# Set float64 dtype to avoid upcast when setting .5
|
||||
df = DataFrame(
|
||||
data=[[1] * 2], index=DatetimeIndex(data=["2019-01-01", "2019-01-02"])
|
||||
).astype({0: "float64"})
|
||||
expected = DataFrame(
|
||||
data=[[0.5, 1], [1.0, 1]],
|
||||
index=DatetimeIndex(data=["2019-01-01", "2019-01-02"]),
|
||||
)
|
||||
|
||||
df.at[row, 0] = 0.5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestAtSetItemWithExpansion:
|
||||
def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture):
|
||||
# GH#25506
|
||||
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
|
||||
result = Series(ts)
|
||||
result.at[1] = ts
|
||||
expected = Series([ts, ts])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtWithDuplicates:
|
||||
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
|
||||
# GH#33041 check that falling back to loc doesn't allow non-scalar
|
||||
# args to slip in
|
||||
|
||||
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
msg = "Invalid call for scalar access"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"] = 1
|
||||
|
||||
|
||||
class TestAtErrors:
|
||||
# TODO: De-duplicate/parametrize
|
||||
# test_at_series_raises_key_error2, test_at_frame_raises_key_error2
|
||||
|
||||
def test_at_series_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
ser = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = indexer_al(ser)[1]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(ser)["a"]
|
||||
|
||||
def test_at_frame_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
|
||||
result = indexer_al(df)[1, 0]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)[1, "a"]
|
||||
|
||||
def test_at_series_raises_key_error2(self, indexer_al):
|
||||
# at should not fallback
|
||||
# GH#7814
|
||||
# GH#31724 .at should match .loc
|
||||
ser = Series([1, 2, 3], index=list("abc"))
|
||||
result = indexer_al(ser)["a"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(ser)[0]
|
||||
|
||||
def test_at_frame_raises_key_error2(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
|
||||
result = indexer_al(df)["a", "A"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
def test_at_frame_multiple_columns(self):
|
||||
# GH#48296 - at shouldn't modify multiple columns
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
new_row = [6, 7]
|
||||
with pytest.raises(
|
||||
InvalidIndexError,
|
||||
match=f"You can only assign a scalar value not a \\{type(new_row)}",
|
||||
):
|
||||
df.at[5] = new_row
|
||||
|
||||
def test_at_getitem_mixed_index_no_fallback(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
ser.at[0]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
ser.at[4]
|
||||
|
||||
def test_at_categorical_integers(self):
|
||||
# CategoricalIndex with integer categories that don't happen to match
|
||||
# the Categorical's codes
|
||||
ci = CategoricalIndex([3, 4])
|
||||
|
||||
arr = np.arange(4).reshape(2, 2)
|
||||
frame = DataFrame(arr, index=ci)
|
||||
|
||||
for df in [frame, frame.T]:
|
||||
for key in [0, 1]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
df.at[key, key]
|
||||
|
||||
def test_at_applied_for_rows(self):
|
||||
# GH#48729 .at should raise InvalidIndexError when assigning rows
|
||||
df = DataFrame(index=["a"], columns=["col1", "col2"])
|
||||
new_row = [123, 15]
|
||||
with pytest.raises(
|
||||
InvalidIndexError,
|
||||
match=f"You can only assign a scalar value not a \\{type(new_row)}",
|
||||
):
|
||||
df.at["a"] = new_row
|
@ -0,0 +1,573 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
return DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
list("aabbca"), dtype=CategoricalDtype(list("cab")), name="B"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df2():
|
||||
return DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
def test_loc_scalar(self, df):
|
||||
dtype = CategoricalDtype(list("cab"))
|
||||
result = df.loc["a"]
|
||||
bidx = Series(list("aaa"), name="B").astype(dtype)
|
||||
assert bidx.dtype == dtype
|
||||
|
||||
expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = df.copy()
|
||||
df.loc["a"] = 20
|
||||
bidx2 = Series(list("aabbca"), name="B").astype(dtype)
|
||||
assert bidx2.dtype == dtype
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20],
|
||||
},
|
||||
index=Index(bidx2),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
with pytest.raises(KeyError, match=r"^'d'$"):
|
||||
df.loc["d"]
|
||||
|
||||
df2 = df.copy()
|
||||
expected = df2.copy()
|
||||
expected.index = expected.index.astype(object)
|
||||
expected.loc["d"] = 10
|
||||
df2.loc["d"] = 10
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_loc_setitem_with_expansion_non_category(self, df):
|
||||
# Setting-with-expansion with a new key "d" that is not among caegories
|
||||
df.loc["a"] = 20
|
||||
|
||||
# Setting a new row on an existing column
|
||||
df3 = df.copy()
|
||||
df3.loc["d", "A"] = 10
|
||||
bidx3 = Index(list("aabbcad"), name="B")
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, 10.0],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df3, expected3)
|
||||
|
||||
# Setting a new row _and_ new column
|
||||
df4 = df.copy()
|
||||
df4.loc["d", "C"] = 10
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df4, expected3)
|
||||
|
||||
def test_loc_getitem_scalar_non_category(self, df):
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[1]
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reverse = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reverse.__array__(), exp)
|
||||
|
||||
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
|
||||
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
{"value": np.arange(11, 21).astype("int64")},
|
||||
index=np.arange(10, 20).astype("int64"),
|
||||
)
|
||||
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name="cats")
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert isinstance(res_col.dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
# GH 7918
|
||||
cats = Categorical(
|
||||
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
|
||||
"values": [2, 2],
|
||||
},
|
||||
index=["j", "k"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
|
||||
index=["h", "i", "j"],
|
||||
name="cats",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
|
||||
index=["h", "i", "j"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_listlike_labels(self, df):
|
||||
# list of labels
|
||||
result = df.loc[["c", "a"]]
|
||||
expected = df.iloc[[4, 0, 1, 5]]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_loc_getitem_listlike_unused_category(self, df2):
|
||||
# GH#37901 a label that is in index.categories but not in index
|
||||
# listlike containing an element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
df2.loc[["a", "b", "e"]]
|
||||
|
||||
def test_loc_getitem_label_unused_category(self, df2):
|
||||
# element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=r"^'e'$"):
|
||||
df2.loc["e"]
|
||||
|
||||
def test_loc_getitem_non_category(self, df2):
|
||||
# not all labels in the categories
|
||||
with pytest.raises(KeyError, match=re.escape("['d'] not in index")):
|
||||
df2.loc[["a", "d"]]
|
||||
|
||||
def test_loc_setitem_expansion_label_unused_category(self, df2):
|
||||
# assigning with a label that is in the categories but not in the index
|
||||
df = df2.copy()
|
||||
df.loc["e"] = 20
|
||||
result = df.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(["a", "b", "c"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
|
||||
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(["a", "b", "a"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_unused_category(self):
|
||||
# contains unused category
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_getitem_listlike_unused_category_raises_keyerror(self):
|
||||
# key that is an *unused* category raises
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
with pytest.raises(KeyError, match="e"):
|
||||
# For comparison, check the scalar behavior
|
||||
df.loc["e"]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
df.loc[["a", "e"]]
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=list("ABC"),
|
||||
columns=list("XYZ"),
|
||||
)
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
|
||||
tm.assert_series_equal(cdf.loc["A", :], expect)
|
||||
|
||||
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
|
||||
tm.assert_series_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
|
||||
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
|
||||
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||||
)
|
||||
def test_ix_categorical_index_non_unique(self, infer_string):
|
||||
# non-unique
|
||||
with option_context("future.infer_string", infer_string):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 3)),
|
||||
index=list("ABA"),
|
||||
columns=list("XYX"),
|
||||
)
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
|
||||
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc["A", :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
|
||||
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[["A", "B"], :],
|
||||
columns=cdf.columns,
|
||||
index=CategoricalIndex(list("AAB")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[:, ["X", "Y"]],
|
||||
index=cdf.index,
|
||||
columns=CategoricalIndex(list("XXY")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
def test_loc_slice(self, df):
|
||||
# GH9748
|
||||
msg = (
|
||||
"cannot do slice indexing on CategoricalIndex with these "
|
||||
r"indexers \[1\] of type int"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc[1:5]
|
||||
|
||||
result = df.loc["b":"c"]
|
||||
expected = df.iloc[[2, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_and_at_with_categorical_index(self):
|
||||
# GH 20629
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"])
|
||||
)
|
||||
|
||||
s = df[0]
|
||||
assert s.loc["A"] == 1
|
||||
assert s.at["A"] == 1
|
||||
|
||||
assert df.loc["B", 1] == 4
|
||||
assert df.at["B", 1] == 4
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values",
|
||||
[
|
||||
# python types
|
||||
[1, 2, 3],
|
||||
[-1, -2, -3],
|
||||
[1.5, 2.5, 3.5],
|
||||
[-1.5, -2.5, -3.5],
|
||||
# numpy int/uint
|
||||
*(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES),
|
||||
# numpy floats
|
||||
*(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES),
|
||||
# numpy object
|
||||
np.array([1, "b", 3.5], dtype=object),
|
||||
# pandas scalars
|
||||
[Interval(1, 4), Interval(4, 6), Interval(6, 9)],
|
||||
[Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)],
|
||||
[Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")],
|
||||
# pandas Integer arrays
|
||||
*(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES),
|
||||
# other pandas arrays
|
||||
pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array,
|
||||
pd.date_range("2019-01-01", periods=3).array,
|
||||
pd.timedelta_range(start="1d", periods=3).array,
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
|
||||
# GH-17569
|
||||
cat_idx = CategoricalIndex(idx_values, ordered=ordered)
|
||||
df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx)
|
||||
sl = slice(idx_values[0], idx_values[1])
|
||||
|
||||
# scalar selection
|
||||
result = df.loc[idx_values[0]]
|
||||
expected = Series(["foo"], index=["A"], name=idx_values[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result = df.loc[idx_values[:2]]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice selection
|
||||
result = df.loc[sl]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# scalar assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[0]] = "qux"
|
||||
expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[:2], "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice assignment
|
||||
result = df.copy()
|
||||
result.loc[sl, "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_categorical_with_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
|
||||
ser = Series(range(3), index=ci)
|
||||
|
||||
assert ser[np.nan] == 2
|
||||
assert ser.loc[np.nan] == 2
|
||||
|
||||
df = DataFrame(ser)
|
||||
assert df.loc[np.nan, 0] == 2
|
||||
assert df.loc[np.nan][0] == 2
|
@ -0,0 +1,647 @@
|
||||
from string import ascii_letters
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import (
|
||||
SettingWithCopyError,
|
||||
SettingWithCopyWarning,
|
||||
)
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
|
||||
|
||||
def random_text(nobs=100):
|
||||
# Construct a DataFrame where each row is a random slice from 'letters'
|
||||
idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2))
|
||||
idxs.sort(axis=1)
|
||||
strings = [ascii_letters[x[0] : x[1]] for x in idxs]
|
||||
|
||||
return DataFrame(strings, columns=["letters"])
|
||||
|
||||
|
||||
class TestCaching:
|
||||
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context("chained_assignment", None):
|
||||
# #3970
|
||||
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# Assignment to wrong series
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
if not using_copy_on_write:
|
||||
tm.assert_almost_equal(df["bb"][0], 0.17)
|
||||
else:
|
||||
# with ArrayManager, parent is not mutated with chained assignment
|
||||
tm.assert_almost_equal(df["bb"][0], 2.2)
|
||||
|
||||
@pytest.mark.parametrize("do_ref", [True, False])
|
||||
def test_setitem_cache_updating(self, do_ref):
|
||||
# GH 5424
|
||||
cont = ["one", "two", "three", "four", "five", "six", "seven"]
|
||||
|
||||
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, "c"] = 1
|
||||
|
||||
assert df.loc[0, "c"] == 0.0
|
||||
assert df.loc[7, "c"] == 1.0
|
||||
|
||||
def test_setitem_cache_updating_slices(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame(
|
||||
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
|
||||
)
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp("5/7/2014")
|
||||
eix = Timestamp("5/9/2014")
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
out_original = out.copy()
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row["C"]][six:eix] + row["D"]
|
||||
with tm.raises_chained_assignment_error(
|
||||
(ix == 0) or warn_copy_on_write or using_copy_on_write
|
||||
):
|
||||
out[row["C"]][six:eix] = v
|
||||
|
||||
if not using_copy_on_write:
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
else:
|
||||
tm.assert_frame_equal(out, out_original)
|
||||
tm.assert_series_equal(out["A"], out_original["A"])
|
||||
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] += row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
def test_altering_series_clears_parent_cache(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH #33675
|
||||
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
|
||||
ser = df["A"]
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
assert "A" not in df._item_cache
|
||||
else:
|
||||
assert "A" in df._item_cache
|
||||
|
||||
# Adding a new entry to ser swaps in a new array, so "A" needs to
|
||||
# be removed from df._item_cache
|
||||
ser["c"] = 5
|
||||
assert len(ser) == 3
|
||||
assert "A" not in df._item_cache
|
||||
assert df["A"] is not ser
|
||||
assert len(df["A"]) == 2
|
||||
|
||||
|
||||
class TestChaining:
|
||||
def test_setitem_chained_setfault(self, using_copy_on_write):
|
||||
# GH6026
|
||||
data = ["right", "left", "left", "left", "right", "left", "timeout"]
|
||||
mdata = ["right", "left", "left", "left", "right", "left", "none"]
|
||||
|
||||
df = DataFrame({"response": np.array(data)})
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=["response"])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
df = DataFrame({"response": data, "response1": data})
|
||||
df_original = df.copy()
|
||||
mask = df.response == "timeout"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.response[mask] = "none"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"].iloc[0] = np.nan
|
||||
if using_copy_on_write:
|
||||
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
|
||||
else:
|
||||
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment(self, using_copy_on_write):
|
||||
with option_context("chained_assignment", "raise"):
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
|
||||
df = DataFrame(
|
||||
np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64"
|
||||
)
|
||||
df_original = df.copy()
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = -6
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_raises(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# test with the chaining
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
df_original = df.copy()
|
||||
assert df._is_copy is None
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = -6
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = np.nan
|
||||
elif not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = -5
|
||||
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][1] = np.nan
|
||||
|
||||
assert df["A"]._is_copy is None
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = -5
|
||||
df["A"][1] = -6
|
||||
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_fails(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["A"] = -5
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = -5
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_doc_example(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Doc example
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["one", "one", "two", "three", "two", "one", "six"],
|
||||
"c": Series(range(7), dtype="int64"),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
indexer = df.a.str.startswith("o")
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[indexer]["c"] = 42
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df[indexer]["c"] = 42
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_object_dtype(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
df = DataFrame(
|
||||
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
|
||||
)
|
||||
df_original = df.copy()
|
||||
|
||||
if not using_copy_on_write and not warn_copy_on_write:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
elif not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["A"][0] = 111
|
||||
|
||||
df.loc[0, "A"] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = 111
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy_pickle(self):
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean("__tmp__pickle") as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_setting_entire_column(self):
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take(self):
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take2(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
pytest.skip("_is_copy is not always set for CoW")
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, "letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# with the enforcement of #45333 in 2.0, the .loc[:, letters] setting
|
||||
# is inplace, so df._is_copy remains non-None.
|
||||
assert df._is_copy is not None
|
||||
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_str(self):
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy(self):
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({"a": [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df["a"] += 1
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_sorting(self):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
ser = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(ser, df[0].sort_values())
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_false_positives(self):
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "b"
|
||||
str(df)
|
||||
|
||||
df = df[df["column2"] != 8]
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "c"
|
||||
str(df)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_undefined_column(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# from SO:
|
||||
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=["count"])
|
||||
df["group"] = "b"
|
||||
df_original = df.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_changing_dtype(
|
||||
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=5),
|
||||
"B": np.random.default_rng(2).standard_normal(5),
|
||||
"C": np.arange(5, dtype="int64"),
|
||||
"D": ["a", "b", "c", "d", "e"],
|
||||
}
|
||||
)
|
||||
df_original = df.copy()
|
||||
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[2]["D"] = "foo"
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[2]["C"] = "foo"
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)):
|
||||
df["C"][2] = "foo"
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
else:
|
||||
assert df.loc[2, "C"] == "foo"
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[2]["D"] = "foo"
|
||||
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[2]["C"] = "foo"
|
||||
|
||||
if not using_array_manager:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["C"][2] = "foo"
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
|
||||
# changing the dtype or not
|
||||
df["C"][2] = "foo"
|
||||
assert df.loc[2, "C"] == "foo"
|
||||
|
||||
def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write):
|
||||
# operating on a copy
|
||||
df = DataFrame(
|
||||
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
||||
)
|
||||
df_original = df.copy()
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
elif warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
else:
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
|
||||
def test_setting_with_copy_bug_no_warning(self):
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
|
||||
df2 = df1[["x"]]
|
||||
|
||||
# this should not raise
|
||||
df2["y"] = ["g", "h", "i"]
|
||||
|
||||
def test_detect_chained_assignment_warnings_errors(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
if using_copy_on_write or warn_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df.loc[0]["A"] = 111
|
||||
return
|
||||
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.assert_produces_warning(SettingWithCopyWarning):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
with option_context("chained_assignment", "raise"):
|
||||
with pytest.raises(SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
|
||||
def test_detect_chained_assignment_warning_stacklevel(
|
||||
self, rhs, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#42570
|
||||
df = DataFrame(np.arange(25).reshape(5, 5))
|
||||
df_original = df.copy()
|
||||
chained = df.loc[:3]
|
||||
with option_context("chained_assignment", "warn"):
|
||||
if not using_copy_on_write and not warn_copy_on_write:
|
||||
with tm.assert_produces_warning(SettingWithCopyWarning) as t:
|
||||
chained[2] = rhs
|
||||
assert t[0].filename == __file__
|
||||
else:
|
||||
# INFO(CoW) no warning, and original dataframe not changed
|
||||
chained[2] = rhs
|
||||
tm.assert_frame_equal(df, df_original)
|
||||
|
||||
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_chained_getitem_with_lists(self):
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
|
||||
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
|
||||
expected = df["A"].iloc[2]
|
||||
result = df.loc[2, "A"]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result2 = df.iloc[2]["A"]
|
||||
tm.assert_numpy_array_equal(result2, expected)
|
||||
result3 = df["A"].loc[2]
|
||||
tm.assert_numpy_array_equal(result3, expected)
|
||||
result4 = df["A"].iloc[2]
|
||||
tm.assert_numpy_array_equal(result4, expected)
|
||||
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = DataFrame(
|
||||
np.zeros((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
)
|
||||
df["A"] # cache series
|
||||
df.loc["Hello Friend"] = df.iloc[0]
|
||||
assert "Hello Friend" in df["A"].index
|
||||
assert "Hello Friend" in df["B"].index
|
||||
|
||||
def test_cache_updating2(self, using_copy_on_write):
|
||||
# 10264
|
||||
df = DataFrame(
|
||||
np.zeros((5, 5), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e"],
|
||||
index=range(5),
|
||||
)
|
||||
df["f"] = 0
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
df.f.values[3] = 1
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
return
|
||||
|
||||
df.f.values[3] = 1
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 6), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e", "f"],
|
||||
index=range(5),
|
||||
)
|
||||
expected.at[3, "f"] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name="f")
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
|
||||
# GH#3970
|
||||
with option_context("chained_assignment", None):
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.13
|
||||
|
||||
# GH#3970 this lookup used to break the chained setting to 0.15
|
||||
df.iloc[ck]
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["bb"].iloc[0] = 0.15
|
||||
|
||||
if not using_copy_on_write:
|
||||
assert df["bb"].iloc[0] == 0.15
|
||||
else:
|
||||
assert df["bb"].iloc[0] == 2.2
|
||||
|
||||
def test_getitem_loc_assignment_slice_state(self):
|
||||
# GH 13569
|
||||
df = DataFrame({"a": [10, 20, 30]})
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].loc[4] = 40
|
||||
tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]}))
|
||||
tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a"))
|
@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import check_array_indexer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
# integer
|
||||
([1, 2], np.array([1, 2], dtype=np.intp)),
|
||||
(np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)),
|
||||
# boolean
|
||||
([True, False, True], np.array([True, False, True], dtype=np.bool_)),
|
||||
(np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)),
|
||||
(
|
||||
pd.array([True, False, True], dtype="boolean"),
|
||||
np.array([True, False, True], dtype=np.bool_),
|
||||
),
|
||||
# other
|
||||
([], np.array([], dtype=np.intp)),
|
||||
],
|
||||
)
|
||||
def test_valid_input(indexer, expected):
|
||||
arr = np.array([1, 2, 3])
|
||||
result = check_array_indexer(arr, indexer)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")]
|
||||
)
|
||||
def test_boolean_na_returns_indexer(indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[True, False],
|
||||
pd.array([True, False], dtype="boolean"),
|
||||
np.array([True, False], dtype=np.bool_),
|
||||
],
|
||||
)
|
||||
def test_bool_raise_length(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Boolean index has wrong length"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")]
|
||||
)
|
||||
def test_int_raise_missing_values(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[0.0, 1.0],
|
||||
np.array([1.0, 2.0], dtype="float64"),
|
||||
np.array([True, False], dtype=object),
|
||||
pd.Index([True, False], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_raise_invalid_array_dtypes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
def test_raise_nullable_string_dtype(nullable_string_dtype):
|
||||
indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
|
||||
def test_pass_through_non_array_likes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
assert result == indexer
|
@ -0,0 +1,940 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.compat import (
|
||||
IS64,
|
||||
is_platform_windows,
|
||||
)
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
###############################################################
|
||||
# Index / Series common tests which may trigger dtype coercions
|
||||
###############################################################
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="class")
|
||||
def check_comprehensiveness(request):
|
||||
# Iterate over combination of dtype, method and klass
|
||||
# and ensure that each are contained within a collected test
|
||||
cls = request.cls
|
||||
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
|
||||
|
||||
def has_test(combo):
|
||||
klass, dtype, method = combo
|
||||
cls_funcs = request.node.session.items
|
||||
return any(
|
||||
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
|
||||
)
|
||||
|
||||
opts = request.config.option
|
||||
if opts.lf or opts.keyword:
|
||||
# If we are running with "last-failed" or -k foo, we expect to only
|
||||
# run a subset of tests.
|
||||
yield
|
||||
|
||||
else:
|
||||
for combo in combos:
|
||||
if not has_test(combo):
|
||||
raise AssertionError(
|
||||
f"test method is not defined: {cls.__name__}, {combo}"
|
||||
)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
class CoercionBase:
|
||||
klasses = ["index", "series"]
|
||||
dtypes = [
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64",
|
||||
"datetime64tz",
|
||||
"timedelta64",
|
||||
"period",
|
||||
]
|
||||
|
||||
@property
|
||||
def method(self):
|
||||
raise NotImplementedError(self)
|
||||
|
||||
|
||||
class TestSetitemCoercion(CoercionBase):
|
||||
method = "setitem"
|
||||
|
||||
# disable comprehensiveness tests, as most of these have been moved to
|
||||
# tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses.
|
||||
klasses: list[str] = []
|
||||
|
||||
def test_setitem_series_no_coercion_from_values_list(self):
|
||||
# GH35865 - int casted to str when internally calling np.array(ser.values)
|
||||
ser = pd.Series(["a", 1])
|
||||
ser[:] = list(ser.values)
|
||||
|
||||
expected = pd.Series(["a", 1])
|
||||
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def _assert_setitem_index_conversion(
|
||||
self, original_series, loc_key, expected_index, expected_dtype
|
||||
):
|
||||
"""test index's coercion triggered by assign key"""
|
||||
temp = original_series.copy()
|
||||
# GH#33469 pre-2.0 with int loc_key and temp.index.dtype == np.float64
|
||||
# `temp[loc_key] = 5` treated loc_key as positional
|
||||
temp[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
temp = original_series.copy()
|
||||
temp.loc[loc_key] = 5
|
||||
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
|
||||
tm.assert_series_equal(temp, exp)
|
||||
# check dtype explicitly for sure
|
||||
assert temp.index.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
|
||||
)
|
||||
def test_setitem_index_object(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
|
||||
assert obj.index.dtype == object
|
||||
|
||||
if exp_dtype is IndexError:
|
||||
temp = obj.copy()
|
||||
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
|
||||
msg = "index 5 is out of bounds for axis 0 with size 4"
|
||||
with pytest.raises(exp_dtype, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
|
||||
temp[5] = 5
|
||||
else:
|
||||
exp_index = pd.Index(list("abcd") + [val], dtype=object)
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
|
||||
)
|
||||
def test_setitem_index_int64(self, val, exp_dtype):
|
||||
obj = pd.Series([1, 2, 3, 4])
|
||||
assert obj.index.dtype == np.int64
|
||||
|
||||
exp_index = pd.Index([0, 1, 2, 3, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val,exp_dtype", [(5, np.float64), (5.1, np.float64), ("x", object)]
|
||||
)
|
||||
def test_setitem_index_float64(self, val, exp_dtype, request):
|
||||
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
|
||||
assert obj.index.dtype == np.float64
|
||||
|
||||
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
|
||||
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_series_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_datetime64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_datetime64tz(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_setitem_index_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestInsertIndexCoercion(CoercionBase):
|
||||
klasses = ["index"]
|
||||
method = "insert"
|
||||
|
||||
def _assert_insert_conversion(self, original, value, expected, expected_dtype):
|
||||
"""test coercion triggered by insert"""
|
||||
target = original.copy()
|
||||
res = target.insert(1, value)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1, object),
|
||||
(1.1, 1.1, object),
|
||||
(False, False, object),
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.Index(list("abcd"), dtype=object)
|
||||
assert obj.dtype == object
|
||||
|
||||
exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1, None),
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, False, object), # GH#36319
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_int_index(
|
||||
self, any_int_numpy_dtype, insert, coerced_val, coerced_dtype
|
||||
):
|
||||
dtype = any_int_numpy_dtype
|
||||
obj = pd.Index([1, 2, 3, 4], dtype=dtype)
|
||||
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
|
||||
|
||||
exp = pd.Index([1, coerced_val, 2, 3, 4], dtype=coerced_dtype)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(1, 1.0, None),
|
||||
# When float_numpy_dtype=float32, this is not the case
|
||||
# see the correction below
|
||||
(1.1, 1.1, np.float64),
|
||||
(False, False, object), # GH#36319
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_float_index(
|
||||
self, float_numpy_dtype, insert, coerced_val, coerced_dtype
|
||||
):
|
||||
dtype = float_numpy_dtype
|
||||
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
|
||||
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
|
||||
|
||||
if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
|
||||
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
|
||||
# the expected dtype will be float32 if the original dtype was float32
|
||||
coerced_dtype = np.float32
|
||||
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"insert_value",
|
||||
[pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
|
||||
)
|
||||
def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
|
||||
obj = pd.DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
|
||||
).as_unit("ns")
|
||||
assert obj.dtype == exp_dtype
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"],
|
||||
tz=fill_val.tz,
|
||||
).as_unit("ns")
|
||||
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
|
||||
|
||||
if fill_val.tz:
|
||||
# mismatched tzawareness
|
||||
ts = pd.Timestamp("2012-01-01")
|
||||
result = obj.insert(1, ts)
|
||||
expected = obj.astype(object).insert(1, ts)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
|
||||
result = obj.insert(1, ts)
|
||||
# once deprecation is enforced:
|
||||
expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
|
||||
assert expected.dtype == obj.dtype
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
else:
|
||||
# mismatched tzawareness
|
||||
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
|
||||
result = obj.insert(1, ts)
|
||||
expected = obj.astype(object).insert(1, ts)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
item = 1
|
||||
result = obj.insert(1, item)
|
||||
expected = obj.astype(object).insert(1, item)
|
||||
assert expected[1] == item
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_index_timedelta64(self):
|
||||
obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
|
||||
assert obj.dtype == "timedelta64[ns]"
|
||||
|
||||
# timedelta64 + timedelta64 => timedelta64
|
||||
exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
|
||||
self._assert_insert_conversion(
|
||||
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
|
||||
)
|
||||
|
||||
for item in [pd.Timestamp("2012-01-01"), 1]:
|
||||
result = obj.insert(1, item)
|
||||
expected = obj.astype(object).insert(1, item)
|
||||
assert expected.dtype == object
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"insert, coerced_val, coerced_dtype",
|
||||
[
|
||||
(pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
|
||||
(pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
|
||||
(1, 1, object),
|
||||
("x", "x", object),
|
||||
],
|
||||
)
|
||||
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
|
||||
obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M")
|
||||
assert obj.dtype == "period[M]"
|
||||
|
||||
data = [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
coerced_val,
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
pd.Period("2011-04", freq="M"),
|
||||
]
|
||||
if isinstance(insert, pd.Period):
|
||||
exp = pd.PeriodIndex(data, freq="M")
|
||||
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
|
||||
|
||||
# string that can be parsed to appropriate PeriodDtype
|
||||
self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype)
|
||||
|
||||
else:
|
||||
result = obj.insert(0, insert)
|
||||
expected = obj.astype(object).insert(0, insert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
|
||||
# casts that string to Period[M], not clear that is desirable
|
||||
if not isinstance(insert, pd.Timestamp):
|
||||
# non-castable string
|
||||
result = obj.insert(0, str(insert))
|
||||
expected = obj.astype(object).insert(0, str(insert))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_insert_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_insert_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestWhereCoercion(CoercionBase):
|
||||
method = "where"
|
||||
_cond = np.array([True, False, True, False])
|
||||
|
||||
def _assert_where_conversion(
|
||||
self, original, cond, values, expected, expected_dtype
|
||||
):
|
||||
"""test coercion triggered by where"""
|
||||
target = original.copy()
|
||||
res = target.where(cond, values)
|
||||
tm.assert_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
def _construct_exp(self, obj, klass, fill_val, exp_dtype):
|
||||
if fill_val is True:
|
||||
values = klass([True, False, True, True])
|
||||
elif isinstance(fill_val, (datetime, np.datetime64)):
|
||||
values = pd.date_range(fill_val, periods=4)
|
||||
else:
|
||||
values = klass(x * fill_val for x in [5, 6, 7, 8])
|
||||
|
||||
exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype)
|
||||
return values, exp
|
||||
|
||||
def _run_test(self, obj, fill_val, klass, exp_dtype):
|
||||
cond = klass(self._cond)
|
||||
|
||||
exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype)
|
||||
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
|
||||
|
||||
values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype)
|
||||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
|
||||
)
|
||||
def test_where_object(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(list("abcd"), dtype=object)
|
||||
assert obj.dtype == object
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_where_int64(self, index_or_series, fill_val, exp_dtype, request):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([1, 2, 3, 4])
|
||||
assert obj.dtype == np.int64
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val, exp_dtype",
|
||||
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_where_float64(self, index_or_series, fill_val, exp_dtype, request):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([1.1, 2.2, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, object),
|
||||
],
|
||||
)
|
||||
def test_where_complex128(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128)
|
||||
assert obj.dtype == np.complex128
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)],
|
||||
)
|
||||
def test_where_series_bool(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass([True, False, True, False])
|
||||
assert obj.dtype == np.bool_
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,exp_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz"],
|
||||
)
|
||||
def test_where_datetime64(self, index_or_series, fill_val, exp_dtype):
|
||||
klass = index_or_series
|
||||
|
||||
obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None))
|
||||
assert obj.dtype == "datetime64[ns]"
|
||||
|
||||
fv = fill_val
|
||||
# do the check with each of the available datetime scalars
|
||||
if exp_dtype == "datetime64[ns]":
|
||||
for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
|
||||
self._run_test(obj, scalar, klass, exp_dtype)
|
||||
else:
|
||||
for scalar in [fv, fv.to_pydatetime()]:
|
||||
self._run_test(obj, fill_val, klass, exp_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_index_complex128(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_series_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_where_series_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")]
|
||||
)
|
||||
def test_where_index_timedelta64(self, value):
|
||||
tdi = pd.timedelta_range("1 Day", periods=4)
|
||||
cond = np.array([True, False, False, True])
|
||||
|
||||
expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"])
|
||||
result = tdi.where(cond, value)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# wrong-dtyped NaT
|
||||
dtnat = np.datetime64("NaT", "ns")
|
||||
expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object)
|
||||
assert expected[1] is dtnat
|
||||
|
||||
result = tdi.where(cond, dtnat)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_index_period(self):
|
||||
dti = pd.date_range("2016-01-01", periods=3, freq="QS")
|
||||
pi = dti.to_period("Q")
|
||||
|
||||
cond = np.array([False, True, False])
|
||||
|
||||
# Passing a valid scalar
|
||||
value = pi[-1] + pi.freq * 10
|
||||
expected = pd.PeriodIndex([value, pi[1], value])
|
||||
result = pi.where(cond, value)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Case passing ndarray[object] of Periods
|
||||
other = np.asarray(pi + pi.freq * 10, dtype=object)
|
||||
result = pi.where(cond, other)
|
||||
expected = pd.PeriodIndex([other[0], pi[1], other[2]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Passing a mismatched scalar -> casts to object
|
||||
td = pd.Timedelta(days=4)
|
||||
expected = pd.Index([td, pi[1], td], dtype=object)
|
||||
result = pi.where(cond, td)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
per = pd.Period("2020-04-21", "D")
|
||||
expected = pd.Index([per, pi[1], per], dtype=object)
|
||||
result = pi.where(cond, per)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestFillnaSeriesCoercion(CoercionBase):
|
||||
# not indexing, but place here for consistency
|
||||
|
||||
method = "fillna"
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_has_comprehensive_tests(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
|
||||
"""test coercion triggered by fillna"""
|
||||
target = original.copy()
|
||||
res = target.fillna(value)
|
||||
tm.assert_equal(res, expected)
|
||||
assert res.dtype == expected_dtype
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val, fill_dtype",
|
||||
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
|
||||
)
|
||||
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(["a", np.nan, "c", "d"], dtype=object)
|
||||
assert obj.dtype == object
|
||||
|
||||
exp = klass(["a", fill_val, "c", "d"], dtype=object)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
|
||||
)
|
||||
def test_fillna_float64(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1.1, np.nan, 3.3, 4.4])
|
||||
assert obj.dtype == np.float64
|
||||
|
||||
exp = klass([1.1, fill_val, 3.3, 4.4])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(1, np.complex128),
|
||||
(1.1, np.complex128),
|
||||
(1 + 1j, np.complex128),
|
||||
(True, object),
|
||||
],
|
||||
)
|
||||
def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128)
|
||||
assert obj.dtype == np.complex128
|
||||
|
||||
exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
|
||||
(1, object),
|
||||
("x", object),
|
||||
],
|
||||
ids=["datetime64", "datetime64tz", "object", "object"],
|
||||
)
|
||||
def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
obj = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
pd.NaT,
|
||||
pd.Timestamp("2011-01-03"),
|
||||
pd.Timestamp("2011-01-04"),
|
||||
]
|
||||
)
|
||||
assert obj.dtype == "datetime64[ns]"
|
||||
|
||||
exp = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01"),
|
||||
fill_val,
|
||||
pd.Timestamp("2011-01-03"),
|
||||
pd.Timestamp("2011-01-04"),
|
||||
]
|
||||
)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val,fill_dtype",
|
||||
[
|
||||
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
|
||||
(pd.Timestamp("2012-01-01"), object),
|
||||
# pre-2.0 with a mismatched tz we would get object result
|
||||
(pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"),
|
||||
(1, object),
|
||||
("x", object),
|
||||
],
|
||||
)
|
||||
def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
|
||||
klass = index_or_series
|
||||
tz = "US/Eastern"
|
||||
|
||||
obj = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.NaT,
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
pd.Timestamp("2011-01-04", tz=tz),
|
||||
]
|
||||
)
|
||||
assert obj.dtype == "datetime64[ns, US/Eastern]"
|
||||
|
||||
if getattr(fill_val, "tz", None) is None:
|
||||
fv = fill_val
|
||||
else:
|
||||
fv = fill_val.tz_convert(tz)
|
||||
exp = klass(
|
||||
[
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
fv,
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
pd.Timestamp("2011-01-04", tz=tz),
|
||||
]
|
||||
)
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val",
|
||||
[
|
||||
1,
|
||||
1.1,
|
||||
1 + 1j,
|
||||
True,
|
||||
pd.Interval(1, 2, closed="left"),
|
||||
pd.Timestamp("2012-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2012-01-01"),
|
||||
pd.Timedelta(days=1),
|
||||
pd.Period("2016-01-01", "D"),
|
||||
],
|
||||
)
|
||||
def test_fillna_interval(self, index_or_series, fill_val):
|
||||
ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan)
|
||||
assert isinstance(ii.dtype, pd.IntervalDtype)
|
||||
obj = index_or_series(ii)
|
||||
|
||||
exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object)
|
||||
|
||||
fill_dtype = object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_int64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_int64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_bool(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_series_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fill_val",
|
||||
[
|
||||
1,
|
||||
1.1,
|
||||
1 + 1j,
|
||||
True,
|
||||
pd.Interval(1, 2, closed="left"),
|
||||
pd.Timestamp("2012-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2012-01-01"),
|
||||
pd.Timedelta(days=1),
|
||||
pd.Period("2016-01-01", "W"),
|
||||
],
|
||||
)
|
||||
def test_fillna_series_period(self, index_or_series, fill_val):
|
||||
pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
|
||||
assert isinstance(pi.dtype, pd.PeriodDtype)
|
||||
obj = index_or_series(pi)
|
||||
|
||||
exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
|
||||
|
||||
fill_dtype = object
|
||||
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_timedelta64(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_fillna_index_period(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class TestReplaceSeriesCoercion(CoercionBase):
|
||||
klasses = ["series"]
|
||||
method = "replace"
|
||||
|
||||
rep: dict[str, list] = {}
|
||||
rep["object"] = ["a", "b"]
|
||||
rep["int64"] = [4, 5]
|
||||
rep["float64"] = [1.1, 2.2]
|
||||
rep["complex128"] = [1 + 1j, 2 + 2j]
|
||||
rep["bool"] = [True, False]
|
||||
rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
|
||||
|
||||
for tz in ["UTC", "US/Eastern"]:
|
||||
# to test tz => different tz replacement
|
||||
key = f"datetime64[ns, {tz}]"
|
||||
rep[key] = [
|
||||
pd.Timestamp("2011-01-01", tz=tz),
|
||||
pd.Timestamp("2011-01-03", tz=tz),
|
||||
]
|
||||
|
||||
rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
|
||||
|
||||
@pytest.fixture(params=["dict", "series"])
|
||||
def how(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
"timedelta64[ns]",
|
||||
]
|
||||
)
|
||||
def from_key(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, UTC]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
"timedelta64[ns]",
|
||||
],
|
||||
ids=[
|
||||
"object",
|
||||
"int64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"bool",
|
||||
"datetime64",
|
||||
"datetime64tz",
|
||||
"datetime64tz",
|
||||
"timedelta64",
|
||||
],
|
||||
)
|
||||
def to_key(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def replacer(self, how, from_key, to_key):
|
||||
"""
|
||||
Object we will pass to `Series.replace`
|
||||
"""
|
||||
if how == "dict":
|
||||
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
|
||||
elif how == "series":
|
||||
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
|
||||
else:
|
||||
raise ValueError
|
||||
return replacer
|
||||
|
||||
# Expected needs adjustment for the infer string option, seems to work as expecetd
|
||||
@pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
|
||||
def test_replace_series(self, how, to_key, from_key, replacer):
|
||||
index = pd.Index([3, 4], name="xxx")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
if from_key.startswith("datetime") and to_key.startswith("datetime"):
|
||||
# tested below
|
||||
return
|
||||
elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
|
||||
# tested below
|
||||
return
|
||||
|
||||
if (from_key == "float64" and to_key in ("int64")) or (
|
||||
from_key == "complex128" and to_key in ("int64", "float64")
|
||||
):
|
||||
if not IS64 or is_platform_windows():
|
||||
pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")
|
||||
|
||||
# Expected: do not downcast by replacement
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key)
|
||||
|
||||
else:
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
assert exp.dtype == to_key
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
warn = FutureWarning
|
||||
if (
|
||||
exp.dtype == obj.dtype
|
||||
or exp.dtype == object
|
||||
or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
|
||||
):
|
||||
warn = None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_key",
|
||||
["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
|
||||
)
|
||||
def test_replace_series_datetime_tz(
|
||||
self, how, to_key, from_key, replacer, using_infer_string
|
||||
):
|
||||
index = pd.Index([3, 4], name="xyz")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
if using_infer_string and to_key == "object":
|
||||
assert exp.dtype == "string"
|
||||
else:
|
||||
assert exp.dtype == to_key
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
warn = FutureWarning if exp.dtype != object else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_key",
|
||||
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"from_key",
|
||||
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
|
||||
indirect=True,
|
||||
)
|
||||
def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
|
||||
index = pd.Index([3, 4], name="xyz")
|
||||
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
|
||||
assert obj.dtype == from_key
|
||||
|
||||
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
|
||||
warn = FutureWarning
|
||||
if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
|
||||
exp.dtype, pd.DatetimeTZDtype
|
||||
):
|
||||
# with mismatched tzs, we retain the original dtype as of 2.0
|
||||
exp = exp.astype(obj.dtype)
|
||||
warn = None
|
||||
else:
|
||||
assert exp.dtype == to_key
|
||||
if to_key == from_key:
|
||||
warn = None
|
||||
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = obj.replace(replacer)
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.xfail(reason="Test not implemented")
|
||||
def test_replace_series_period(self):
|
||||
raise NotImplementedError
|
@ -0,0 +1,191 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_get_loc_naive_dti_aware_str_deprecated(self):
|
||||
# GH#46903
|
||||
ts = Timestamp("20130101")._value
|
||||
dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)])
|
||||
ser = Series(range(100), index=dti)
|
||||
|
||||
key = "2013-01-01 00:00:00.000000050+0000"
|
||||
msg = re.escape(repr(key))
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
dti.get_loc(key)
|
||||
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
# GH#8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
|
||||
dr = date_range("20130110", periods=3)
|
||||
df = DataFrame({"A": idx, "B": dr})
|
||||
df["C"] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
expected = Series(
|
||||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
|
||||
index=list("ABC"),
|
||||
dtype="object",
|
||||
name=1,
|
||||
)
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_indexing_fast_xs(self):
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
|
||||
result = df.iloc[5]
|
||||
expected = Series(
|
||||
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")],
|
||||
index=["a"],
|
||||
name=5,
|
||||
dtype="M8[ns, UTC]",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self, indexer_sl):
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range("2015-01-01", periods=2, tz="utc")
|
||||
|
||||
ser = Series(range(2), index=index, dtype="int64")
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
result = indexer_sl(ser)[sel]
|
||||
expected = ser.copy()
|
||||
if sel is not index:
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert indexer_sl(ser)[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nanosecond_getitem_setitem_with_tz(self):
|
||||
# GH 11679
|
||||
data = ["2016-06-28 08:30:00.123456789"]
|
||||
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
|
||||
df = DataFrame({"a": [10]}, index=index)
|
||||
result = df.loc[df.index[0]]
|
||||
expected = Series(10, index=["a"], name=df.index[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[df.index[0], "a"] = -1
|
||||
expected = DataFrame(-1, index=index, columns=["a"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
|
||||
# GH#33589
|
||||
|
||||
keys = [
|
||||
"2017-10-25T16:25:04.151",
|
||||
"2017-10-25T16:25:04.252",
|
||||
"2017-10-25T16:50:05.237",
|
||||
"2017-10-25T16:50:05.238",
|
||||
]
|
||||
obj = frame_or_series(
|
||||
[1, 2, 3, 4],
|
||||
index=[Timestamp(x) for x in keys],
|
||||
)
|
||||
result = obj[keys[1] : keys[2]]
|
||||
expected = frame_or_series(
|
||||
[2, 3],
|
||||
index=[
|
||||
Timestamp(keys[1]),
|
||||
Timestamp(keys[2]),
|
||||
],
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_getitem_pyarrow_index(self, frame_or_series):
|
||||
# GH 53644
|
||||
pytest.importorskip("pyarrow")
|
||||
obj = frame_or_series(
|
||||
range(5),
|
||||
index=date_range("2020", freq="D", periods=5).astype(
|
||||
"timestamp[us][pyarrow]"
|
||||
),
|
||||
)
|
||||
result = obj.loc[obj.index[:-3]]
|
||||
expected = frame_or_series(
|
||||
range(2),
|
||||
index=date_range("2020", freq="D", periods=2).astype(
|
||||
"timestamp[us][pyarrow]"
|
||||
),
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,689 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
RangeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def gen_obj(klass, index):
|
||||
if klass is Series:
|
||||
obj = Series(np.arange(len(index)), index=index)
|
||||
else:
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(index), len(index))),
|
||||
index=index,
|
||||
columns=index,
|
||||
)
|
||||
return obj
|
||||
|
||||
|
||||
class TestFloatIndexers:
|
||||
def check(self, result, original, indexer, getitem):
|
||||
"""
|
||||
comparator for results
|
||||
we need to take care if we are indexing on a
|
||||
Series or a frame
|
||||
"""
|
||||
if isinstance(original, Series):
|
||||
expected = original.iloc[indexer]
|
||||
elif getitem:
|
||||
expected = original.iloc[:, indexer]
|
||||
else:
|
||||
expected = original.iloc[indexer]
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde")),
|
||||
Index(list("abcde"), dtype="category"),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# getting
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
indexer_sl(s)[3.0]
|
||||
|
||||
# contains
|
||||
assert 3.0 not in s
|
||||
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[3.0] = 10
|
||||
|
||||
if indexer_sl is tm.setitem:
|
||||
assert 3.0 in s2.axes[-1]
|
||||
elif indexer_sl is tm.loc:
|
||||
assert 3.0 in s2.axes[0]
|
||||
else:
|
||||
assert 3.0 not in s2.axes[0]
|
||||
assert 3.0 not in s2.axes[-1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde")),
|
||||
Index(list("abcde"), dtype="category"),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric_series_fallback(self, index):
|
||||
# fallsback to position selection, series only
|
||||
s = Series(np.arange(len(index)), index=index)
|
||||
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s[3]
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
s[3.0]
|
||||
|
||||
def test_scalar_with_mixed(self, indexer_sl):
|
||||
s2 = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s3 = Series([1, 2, 3], index=["a", "b", 1.5])
|
||||
|
||||
# lookup in a pure string index with an invalid indexer
|
||||
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
result = indexer_sl(s2)["b"]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
if indexer_sl is not tm.loc:
|
||||
# __getitem__ falls back to positional
|
||||
msg = "Series.__getitem__ treating keys as positions is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s3[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
result = indexer_sl(s3)[1.5]
|
||||
expected = 3
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_scalar_integer(self, index, frame_or_series, indexer_sl):
|
||||
getitem = indexer_sl is not tm.loc
|
||||
|
||||
# test how scalar float indexers work on int indexes
|
||||
|
||||
# integer index
|
||||
i = index
|
||||
obj = gen_obj(frame_or_series, i)
|
||||
|
||||
# coerce to equal int
|
||||
|
||||
result = indexer_sl(obj)[3.0]
|
||||
self.check(result, obj, 3, getitem)
|
||||
|
||||
if isinstance(obj, Series):
|
||||
|
||||
def compare(x, y):
|
||||
assert x == y
|
||||
|
||||
expected = 100
|
||||
else:
|
||||
compare = tm.assert_series_equal
|
||||
if getitem:
|
||||
expected = Series(100, index=range(len(obj)), name=3)
|
||||
else:
|
||||
expected = Series(100.0, index=range(len(obj)), name=3)
|
||||
|
||||
s2 = obj.copy()
|
||||
indexer_sl(s2)[3.0] = 100
|
||||
|
||||
result = indexer_sl(s2)[3.0]
|
||||
compare(result, expected)
|
||||
|
||||
result = indexer_sl(s2)[3]
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_scalar_integer_contains_float(self, index, frame_or_series):
|
||||
# contains
|
||||
# integer index
|
||||
obj = gen_obj(frame_or_series, index)
|
||||
|
||||
# coerce to equal int
|
||||
assert 3.0 in obj
|
||||
|
||||
def test_scalar_float(self, frame_or_series):
|
||||
# scalar float indexers work on a float index
|
||||
index = Index(np.arange(5.0))
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# assert all operations except for iloc are ok
|
||||
indexer = index[3]
|
||||
for idxr in [tm.loc, tm.setitem]:
|
||||
getitem = idxr is not tm.loc
|
||||
|
||||
# getting
|
||||
result = idxr(s)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# setting
|
||||
s2 = s.copy()
|
||||
|
||||
result = idxr(s2)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# random float is a KeyError
|
||||
with pytest.raises(KeyError, match=r"^3\.5$"):
|
||||
idxr(s)[3.5]
|
||||
|
||||
# contains
|
||||
assert 3.0 in s
|
||||
|
||||
# iloc succeeds with an integer
|
||||
expected = s.iloc[3]
|
||||
s2 = s.copy()
|
||||
|
||||
s2.iloc[3] = expected
|
||||
result = s2.iloc[3]
|
||||
self.check(result, s, 3, False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index(list("abcde"), dtype=object),
|
||||
date_range("2020-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
period_range("2020-01-01", periods=5),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli):
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# getitem
|
||||
if indexer_sli is tm.iloc:
|
||||
msg = (
|
||||
"cannot do positional indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers "
|
||||
r"\[(3|4)(\.0)?\] "
|
||||
r"of type (float|int)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx]
|
||||
|
||||
# setitem
|
||||
if indexer_sli is tm.iloc:
|
||||
# otherwise we keep the same message as above
|
||||
msg = "slice indices must be integers or None or have an __index__ method"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx] = 0
|
||||
|
||||
def test_slice_integer(self):
|
||||
# same as above, but for Integer based indexes
|
||||
# these coerce to a like integer
|
||||
# oob indicates if we are out of bounds
|
||||
# of positional indexing
|
||||
for index, oob in [
|
||||
(Index(np.arange(5, dtype=np.int64)), False),
|
||||
(RangeIndex(5), False),
|
||||
(Index(np.arange(5, dtype=np.int64) + 10), True),
|
||||
]:
|
||||
# s is an in-range index
|
||||
s = Series(range(5), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(3, 5)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(-6, 6)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-6.0, 6.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res1 in [
|
||||
(slice(2.5, 4), slice(3, 5)),
|
||||
(slice(2, 3.5), slice(2, 4)),
|
||||
(slice(2.5, 3.5), slice(3, 4)),
|
||||
]:
|
||||
result = s.loc[idx]
|
||||
if oob:
|
||||
res = slice(0, 0)
|
||||
else:
|
||||
res = res1
|
||||
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
|
||||
def test_integer_positional_indexing(self, idx):
|
||||
"""make sure that we are raising on positional indexing
|
||||
w.r.t. an integer index
|
||||
"""
|
||||
s = Series(range(2, 6), index=range(2, 6))
|
||||
|
||||
result = s[2:4]
|
||||
expected = s.iloc[2:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
klass = RangeIndex
|
||||
msg = (
|
||||
"cannot do (slice|positional) indexing "
|
||||
rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.iloc[idx]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_slice_integer_frame_getitem(self, index):
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
|
||||
result = s.loc[idx]
|
||||
indexer = slice(0, 2)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, slice(-10, 10), True)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-10.0, 10.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res in [
|
||||
(slice(0.5, 1), slice(1, 2)),
|
||||
(slice(0, 0.5), slice(0, 1)),
|
||||
(slice(0.5, 1.5), slice(1, 2)),
|
||||
]:
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[0\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
@pytest.mark.parametrize(
|
||||
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
|
||||
)
|
||||
def test_float_slice_getitem_with_integer_index_raises(self, idx, index):
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
|
||||
|
||||
# setitem
|
||||
sc = s.copy()
|
||||
sc.loc[idx] = 0
|
||||
result = sc.loc[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_float(self, idx, frame_or_series, indexer_sl):
|
||||
# same as above, but for floats
|
||||
index = Index(np.arange(5.0)) + 0.1
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
expected = s.iloc[3:4]
|
||||
|
||||
# getitem
|
||||
result = indexer_sl(s)[idx]
|
||||
assert isinstance(result, type(s))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[idx] = 0
|
||||
result = indexer_sl(s2)[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
def test_floating_index_doc_example(self):
|
||||
index = Index([1.5, 2, 3, 4.5, 5])
|
||||
s = Series(range(5), index=index)
|
||||
assert s[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.iloc[3] == 3
|
||||
|
||||
def test_floating_misc(self, indexer_sl):
|
||||
# related 236
|
||||
# scalar/slicing of a float index
|
||||
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
|
||||
|
||||
# label based slicing
|
||||
result = indexer_sl(s)[1.0:3.0]
|
||||
expected = Series(1, index=[2.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# exact indexing when found
|
||||
|
||||
result = indexer_sl(s)[5.0]
|
||||
assert result == 2
|
||||
|
||||
result = indexer_sl(s)[5]
|
||||
assert result == 2
|
||||
|
||||
# value not found (and no fallbacking at all)
|
||||
|
||||
# scalar integers
|
||||
with pytest.raises(KeyError, match=r"^4$"):
|
||||
indexer_sl(s)[4]
|
||||
|
||||
# fancy floats/integers create the correct entry (as nan)
|
||||
# fancy tests
|
||||
expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64))
|
||||
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
|
||||
for fancy_idx in [[5, 0], np.array([5, 0])]:
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
warn = FutureWarning if indexer_sl is tm.setitem else None
|
||||
msg = r"The behavior of obj\[i:j\] with a float-dtype index"
|
||||
|
||||
# all should return the same as we are slicing 'the same'
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result1 = indexer_sl(s)[2:5]
|
||||
result2 = indexer_sl(s)[2.0:5.0]
|
||||
result3 = indexer_sl(s)[2.0:5]
|
||||
result4 = indexer_sl(s)[2.1:5]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
tm.assert_series_equal(result1, result4)
|
||||
|
||||
expected = Series([1, 2], index=[2.5, 5.0])
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = indexer_sl(s)[2:5]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result1 = indexer_sl(s)[[0.0, 5, 10]]
|
||||
result2 = s.iloc[[0, 2, 4]]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[1.6, 5, 10]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[0, 1, 2]]
|
||||
|
||||
result = indexer_sl(s)[[2.5, 5]]
|
||||
tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
|
||||
|
||||
result = indexer_sl(s)[[2.5]]
|
||||
tm.assert_series_equal(result, Series([1], index=[2.5]))
|
||||
|
||||
def test_floatindex_slicing_bug(self, float_numpy_dtype):
|
||||
# GH 5557, related to slicing a float index
|
||||
dtype = float_numpy_dtype
|
||||
ser = {
|
||||
256: 2321.0,
|
||||
1: 78.0,
|
||||
2: 2716.0,
|
||||
3: 0.0,
|
||||
4: 369.0,
|
||||
5: 0.0,
|
||||
6: 269.0,
|
||||
7: 0.0,
|
||||
8: 0.0,
|
||||
9: 0.0,
|
||||
10: 3536.0,
|
||||
11: 0.0,
|
||||
12: 24.0,
|
||||
13: 0.0,
|
||||
14: 931.0,
|
||||
15: 0.0,
|
||||
16: 101.0,
|
||||
17: 78.0,
|
||||
18: 9643.0,
|
||||
19: 0.0,
|
||||
20: 0.0,
|
||||
21: 0.0,
|
||||
22: 63761.0,
|
||||
23: 0.0,
|
||||
24: 446.0,
|
||||
25: 0.0,
|
||||
26: 34773.0,
|
||||
27: 0.0,
|
||||
28: 729.0,
|
||||
29: 78.0,
|
||||
30: 0.0,
|
||||
31: 0.0,
|
||||
32: 3374.0,
|
||||
33: 0.0,
|
||||
34: 1391.0,
|
||||
35: 0.0,
|
||||
36: 361.0,
|
||||
37: 0.0,
|
||||
38: 61808.0,
|
||||
39: 0.0,
|
||||
40: 0.0,
|
||||
41: 0.0,
|
||||
42: 6677.0,
|
||||
43: 0.0,
|
||||
44: 802.0,
|
||||
45: 0.0,
|
||||
46: 2691.0,
|
||||
47: 0.0,
|
||||
48: 3582.0,
|
||||
49: 0.0,
|
||||
50: 734.0,
|
||||
51: 0.0,
|
||||
52: 627.0,
|
||||
53: 70.0,
|
||||
54: 2584.0,
|
||||
55: 0.0,
|
||||
56: 324.0,
|
||||
57: 0.0,
|
||||
58: 605.0,
|
||||
59: 0.0,
|
||||
60: 0.0,
|
||||
61: 0.0,
|
||||
62: 3989.0,
|
||||
63: 10.0,
|
||||
64: 42.0,
|
||||
65: 0.0,
|
||||
66: 904.0,
|
||||
67: 0.0,
|
||||
68: 88.0,
|
||||
69: 70.0,
|
||||
70: 8172.0,
|
||||
71: 0.0,
|
||||
72: 0.0,
|
||||
73: 0.0,
|
||||
74: 64902.0,
|
||||
75: 0.0,
|
||||
76: 347.0,
|
||||
77: 0.0,
|
||||
78: 36605.0,
|
||||
79: 0.0,
|
||||
80: 379.0,
|
||||
81: 70.0,
|
||||
82: 0.0,
|
||||
83: 0.0,
|
||||
84: 3001.0,
|
||||
85: 0.0,
|
||||
86: 1630.0,
|
||||
87: 7.0,
|
||||
88: 364.0,
|
||||
89: 0.0,
|
||||
90: 67404.0,
|
||||
91: 9.0,
|
||||
92: 0.0,
|
||||
93: 0.0,
|
||||
94: 7685.0,
|
||||
95: 0.0,
|
||||
96: 1017.0,
|
||||
97: 0.0,
|
||||
98: 2831.0,
|
||||
99: 0.0,
|
||||
100: 2963.0,
|
||||
101: 0.0,
|
||||
102: 854.0,
|
||||
103: 0.0,
|
||||
104: 0.0,
|
||||
105: 0.0,
|
||||
106: 0.0,
|
||||
107: 0.0,
|
||||
108: 0.0,
|
||||
109: 0.0,
|
||||
110: 0.0,
|
||||
111: 0.0,
|
||||
112: 0.0,
|
||||
113: 0.0,
|
||||
114: 0.0,
|
||||
115: 0.0,
|
||||
116: 0.0,
|
||||
117: 0.0,
|
||||
118: 0.0,
|
||||
119: 0.0,
|
||||
120: 0.0,
|
||||
121: 0.0,
|
||||
122: 0.0,
|
||||
123: 0.0,
|
||||
124: 0.0,
|
||||
125: 0.0,
|
||||
126: 67744.0,
|
||||
127: 22.0,
|
||||
128: 264.0,
|
||||
129: 0.0,
|
||||
260: 197.0,
|
||||
268: 0.0,
|
||||
265: 0.0,
|
||||
269: 0.0,
|
||||
261: 0.0,
|
||||
266: 1198.0,
|
||||
267: 0.0,
|
||||
262: 2629.0,
|
||||
258: 775.0,
|
||||
257: 0.0,
|
||||
263: 0.0,
|
||||
259: 0.0,
|
||||
264: 163.0,
|
||||
250: 10326.0,
|
||||
251: 0.0,
|
||||
252: 1228.0,
|
||||
253: 0.0,
|
||||
254: 2769.0,
|
||||
255: 0.0,
|
||||
}
|
||||
|
||||
# smoke test for the repr
|
||||
s = Series(ser, dtype=dtype)
|
||||
result = s.value_counts()
|
||||
assert result.index.dtype == dtype
|
||||
str(result)
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_iat(float_frame):
|
||||
for i, row in enumerate(float_frame.index):
|
||||
for j, col in enumerate(float_frame.columns):
|
||||
result = float_frame.iat[i, j]
|
||||
expected = float_frame.at[row, col]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iat_duplicate_columns():
|
||||
# https://github.com/pandas-dev/pandas/issues/11754
|
||||
df = DataFrame([[1, 2]], columns=["x", "x"])
|
||||
assert df.iat[0, 0] == 1
|
||||
|
||||
|
||||
def test_iat_getitem_series_with_period_index():
|
||||
# GH#4390, iat incorrectly indexing
|
||||
index = period_range("1/1/2001", periods=10)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10), index=index)
|
||||
expected = ser[index[0]]
|
||||
result = ser.iat[0]
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_iat_setitem_item_cache_cleared(
|
||||
indexer_ial, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# GH#45684
|
||||
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
|
||||
df = DataFrame(data).copy()
|
||||
ser = df["y"]
|
||||
|
||||
# previously this iat setting would split the block and fail to clear
|
||||
# the item_cache.
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
indexer_ial(df)[7, 0] = 9999
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
indexer_ial(df)[7, 1] = 1234
|
||||
|
||||
assert df.iat[7, 1] == 1234
|
||||
if not using_copy_on_write:
|
||||
assert ser.iloc[-1] == 1234
|
||||
assert df.iloc[-1, -1] == 1234
|
1478
lib/python3.13/site-packages/pandas/tests/indexing/test_iloc.py
Normal file
1478
lib/python3.13/site-packages/pandas/tests/indexing/test_iloc.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,61 @@
|
||||
# Tests aimed at pandas.core.indexers
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.indexers import (
|
||||
is_scalar_indexer,
|
||||
length_of_indexer,
|
||||
validate_indices,
|
||||
)
|
||||
|
||||
|
||||
def test_length_of_indexer():
|
||||
arr = np.zeros(4, dtype=bool)
|
||||
arr[0] = 1
|
||||
result = length_of_indexer(arr)
|
||||
assert result == 1
|
||||
|
||||
|
||||
def test_is_scalar_indexer():
|
||||
indexer = (0, 1)
|
||||
assert is_scalar_indexer(indexer, 2)
|
||||
assert not is_scalar_indexer(indexer[0], 2)
|
||||
|
||||
indexer = (np.array([2]), 1)
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3, 4]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
assert not is_scalar_indexer(slice(None), 1)
|
||||
|
||||
indexer = 0
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
indexer = (0,)
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
|
||||
class TestValidateIndices:
|
||||
def test_validate_indices_ok(self):
|
||||
indices = np.asarray([0, 1])
|
||||
validate_indices(indices, 2)
|
||||
validate_indices(indices[:0], 0)
|
||||
validate_indices(np.array([-1, -1]), 0)
|
||||
|
||||
def test_validate_indices_low(self):
|
||||
indices = np.asarray([0, -2])
|
||||
with pytest.raises(ValueError, match="'indices' contains"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_high(self):
|
||||
indices = np.asarray([0, 1, 2])
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_empty(self):
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(np.array([0, 1]), 0)
|
1157
lib/python3.13/site-packages/pandas/tests/indexing/test_indexing.py
Normal file
1157
lib/python3.13/site-packages/pandas/tests/indexing/test_indexing.py
Normal file
File diff suppressed because it is too large
Load Diff
3366
lib/python3.13/site-packages/pandas/tests/indexing/test_loc.py
Normal file
3366
lib/python3.13/site-packages/pandas/tests/indexing/test_loc.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype",
|
||||
[
|
||||
([], "object"),
|
||||
([1, 2, 3], "int64"),
|
||||
([1.0, 2.0, 3.0], "float64"),
|
||||
(["a", "b", "c"], "object"),
|
||||
(["a", "b", "c"], "string"),
|
||||
([1, 2, 3], "datetime64[ns]"),
|
||||
([1, 2, 3], "datetime64[ns, CET]"),
|
||||
([1, 2, 3], "timedelta64[ns]"),
|
||||
(["2000", "2001", "2002"], "Period[D]"),
|
||||
([1, 0, 3], "Sparse"),
|
||||
([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False], [True, True, True], [False, False, False]]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series])
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
|
||||
# In case len(values) < 3
|
||||
index = ["a", "b", "c"][: len(values)]
|
||||
mask = mask[: len(values)]
|
||||
|
||||
obj = pd.Series(values, dtype=dtype, index=index)
|
||||
if frame:
|
||||
if len(values) == 0:
|
||||
# Otherwise obj is an empty DataFrame with shape (0, 1)
|
||||
obj = pd.DataFrame(dtype=dtype, index=index)
|
||||
else:
|
||||
obj = obj.to_frame()
|
||||
|
||||
if indexer_class is pd.array:
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
elif indexer_class is pd.Series:
|
||||
mask = pd.Series(mask, index=obj.index, dtype="boolean")
|
||||
else:
|
||||
mask = indexer_class(mask)
|
||||
|
||||
expected = obj[mask]
|
||||
|
||||
result = obj[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
if indexer_class is pd.Series:
|
||||
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
else:
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_na_treated_as_false(frame_or_series, indexer_sli):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
obj = frame_or_series([1, 2, 3])
|
||||
|
||||
mask = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = indexer_sli(obj)[mask]
|
||||
expected = indexer_sli(obj)[mask.fillna(False)]
|
||||
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,702 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TODO: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestEmptyFrameSetitemExpansion:
|
||||
def test_empty_frame_setitem_index_name_retained(self):
|
||||
# GH#31368 empty frame has non-None index.name -> retained
|
||||
df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4},
|
||||
index=pd.RangeIndex(4, name="df_index"),
|
||||
columns=Index(["series"], dtype=object),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_empty_frame_setitem_index_name_inherited(self):
|
||||
# GH#36527 empty frame has None index.name -> not retained
|
||||
df = DataFrame()
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4},
|
||||
index=pd.RangeIndex(4, name="series_index"),
|
||||
columns=Index(["series"], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_zerolen_series_columns_align(self):
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=["B"])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_loc_setitem_zerolen_list_length_must_match_columns(self):
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
|
||||
msg = "cannot set a row with mismatched columns"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[3] = [6, 7] # length matches len(df.columns) --> OK!
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
msg = "cannot set a frame with no defined columns"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = 1
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = Series([1], index=["foo"])
|
||||
|
||||
msg = "cannot set a frame with no defined index and a scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
def test_partial_set_empty_frame2(self):
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH#5632
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="object")
|
||||
)
|
||||
|
||||
df = DataFrame(index=Index([], dtype="object"))
|
||||
df["foo"] = Series([], dtype="object")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([]))
|
||||
df["foo"] = Series(df.index)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([]))
|
||||
df["foo"] = df.index
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame3(self):
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
|
||||
)
|
||||
expected["foo"] = expected["foo"].astype("float64")
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = []
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = Series(np.arange(len(df)), dtype="float64")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame4(self):
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = range(len(df))
|
||||
|
||||
expected = DataFrame(
|
||||
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
|
||||
)
|
||||
# range is int-dtype-like, so we get int64 dtype
|
||||
expected["foo"] = expected["foo"].astype("int64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame5(self):
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, pd.RangeIndex(0))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=["foo"])
|
||||
df.loc[:, 1] = Series([1], index=["foo"])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_partial_set_empty_frame_no_index(self):
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df[0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH#5720, GH#5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["New"] = expected["New"].astype("float64")
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y["New"] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
|
||||
expected = DataFrame(columns=["a", "b", "c c", "d"])
|
||||
expected["d"] = expected["d"].astype("int64")
|
||||
df = DataFrame(columns=["a", "b", "c c"])
|
||||
df["d"] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=["A", "B", "C"])
|
||||
expected = DataFrame(columns=["A", "B", "C"])
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["C"] = expected["C"].astype("float64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH#5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series(dtype=object))
|
||||
expected = DataFrame({0: Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(Series(name="foo", dtype=object))
|
||||
expected = DataFrame({"foo": Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH#5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df["a"] = 0
|
||||
expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object))
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
|
||||
# GH#6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = [1, 2]
|
||||
expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = ["1", "2"]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"x": Series(
|
||||
["1", "2"],
|
||||
dtype=object if not using_infer_string else "string[pyarrow_numpy]",
|
||||
),
|
||||
"y": Series([np.nan, np.nan], dtype=object),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df.loc[0, "x"] = 1
|
||||
expected = DataFrame({"x": [1], "y": [np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
|
||||
class TestPartialSetting:
|
||||
def test_partial_setting(self):
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[3] = 5.0
|
||||
|
||||
msg = "index 3 is out of bounds for axis 0 with size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[3] = 5.0
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_partial_setting_frame(self, using_array_manager):
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
|
||||
)
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[4, 2] = 5.0
|
||||
|
||||
msg = "index 2 is out of bounds for axis 0 with size 2"
|
||||
if using_array_manager:
|
||||
msg = "list index out of range"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iat[4, 2] = 5.0
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]})
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})
|
||||
df = df_orig.copy()
|
||||
df["B"] = df["B"].astype(np.float64)
|
||||
# as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
|
||||
# setting inplace
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting2(self):
|
||||
# GH 8473
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df_orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=dates,
|
||||
columns=["A", "B", "C", "D"],
|
||||
)
|
||||
|
||||
expected = pd.concat(
|
||||
[df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
|
||||
)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = pd.concat([df, DataFrame(s).T.infer_objects()])
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
result = ser.reindex([3, 2, 3, "x"])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
result = ser.reindex([2, 2, "x", 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] "
|
||||
r"are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[3, 4, 4]]
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 3, 3]]
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 4, 4]]
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[7, 2, 2]]
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[4, 5, 5]]
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype="int64", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
# loc
|
||||
with pytest.raises(KeyError, match=r"\[3\] not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\['x'\] not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', "
|
||||
r"name='idx'\)\] are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
idx = Index([1, 2, 3], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
|
||||
|
||||
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
@pytest.mark.parametrize("key", [100, 100.0])
|
||||
def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
|
||||
# GH#4940 inserting non-strings
|
||||
orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df = orig.copy()
|
||||
|
||||
df.loc[key, :] = df.iloc[0]
|
||||
ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name)
|
||||
ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
|
||||
expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_invalid(self):
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
df.loc["a", :] = df.iloc[0]
|
||||
ser = Series(df.iloc[0], name="a")
|
||||
exp = pd.concat([orig, DataFrame(ser).T.infer_objects()])
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
|
||||
assert df.index.dtype == "object"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,expected_idx",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Period("2000-01-04", freq="D"),
|
||||
Period("2000-01-08", freq="D"),
|
||||
Period("2000-01-12", freq="D"),
|
||||
],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Timestamp("2000-01-04"),
|
||||
Timestamp("2000-01-08"),
|
||||
Timestamp("2000-01-12"),
|
||||
],
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
["4D", "8D", "12D"],
|
||||
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes(
|
||||
self, idx, labels, expected_idx, frame_or_series
|
||||
):
|
||||
# GH 11278
|
||||
obj = frame_or_series(range(20), index=idx)
|
||||
|
||||
expected_value = [3, 7, 11]
|
||||
expected = frame_or_series(expected_value, expected_idx)
|
||||
|
||||
tm.assert_equal(expected, obj.loc[labels])
|
||||
if frame_or_series is Series:
|
||||
tm.assert_series_equal(expected, obj[labels])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
|
||||
self, idx, labels
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
msg = r"not in index"
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,msg",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
Index(["4D", "8D"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
Index(["4D", "8D"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
Index(["2000-01-04", "2000-01-08"], dtype=object),
|
||||
(
|
||||
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
|
||||
r"dtype='object'\)\] are in the \[index\]"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
|
||||
self, idx, labels, msg
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
|
||||
class TestStringSlicing:
|
||||
def test_slice_irregular_datetime_index_with_nan(self):
|
||||
# GH36953
|
||||
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
|
||||
df = DataFrame(range(len(index)), index=index)
|
||||
expected = DataFrame(range(len(index[:3])), index=index[:3])
|
||||
with pytest.raises(KeyError, match="non-existing keys is not allowed"):
|
||||
# Upper bound is not in index (which is unordered)
|
||||
# GH53983
|
||||
# GH37819
|
||||
df["2012-01-01":"2012-01-04"]
|
||||
# Need this precision for right bound since the right slice
|
||||
# bound is "rounded" up to the largest timepoint smaller than
|
||||
# the next "resolution"-step of the provided point.
|
||||
# e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
|
||||
result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,303 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def generate_indices(f, values=False):
|
||||
"""
|
||||
generate the indices
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = (list(range(len(ax))) for ax in axes)
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
|
||||
class TestScalar:
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints"])
|
||||
def test_iat_set_ints(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
indices = generate_indices(f, True)
|
||||
for i in indices:
|
||||
f.iat[i] = 1
|
||||
expected = f.values[i]
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["labels", "ts", "floats"])
|
||||
def test_iat_set_other(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx = next(generate_indices(f, False))
|
||||
f.iat[idx] = 1
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"])
|
||||
def test_at_set_ints_other(self, kind, col, request):
|
||||
f = request.getfixturevalue(f"{kind}_{col}")
|
||||
indices = generate_indices(f, False)
|
||||
for i in indices:
|
||||
f.at[i] = 1
|
||||
expected = f.loc[i]
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
|
||||
class TestAtAndiAT:
|
||||
# at and iat tests that don't need Base class
|
||||
|
||||
def test_float_index_at_iat(self):
|
||||
ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
|
||||
for el, item in ser.items():
|
||||
assert ser.at[el] == item
|
||||
for i in range(len(ser)):
|
||||
assert ser.iat[i] == i + 1
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 4)),
|
||||
index=dates,
|
||||
columns=["A", "B", "C", "D"],
|
||||
)
|
||||
s = df["A"]
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, expected",
|
||||
[
|
||||
[
|
||||
Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]"),
|
||||
Timestamp("2014-02-02"),
|
||||
],
|
||||
[
|
||||
Series(["1 days", "2 days"], dtype="timedelta64[ns]"),
|
||||
Timedelta("2 days"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_iloc_iat_coercion_datelike(self, indexer_ial, ser, expected):
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
result = indexer_ial(ser)[1]
|
||||
assert result == expected
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
msg = "index 10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[10]
|
||||
msg = "index -10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[-10]
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_frame_at_with_duplicate_axes(self):
|
||||
# GH#33041
|
||||
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
result = df.at[0, "A"]
|
||||
expected = df.iloc[0].copy()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.T.at["A", 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setter
|
||||
df.at[1, "A"] = 2
|
||||
expected = Series([2.0, 2.0], index=["A", "A"], name=1)
|
||||
tm.assert_series_equal(df.iloc[1], expected)
|
||||
|
||||
def test_at_getitem_dt64tz_values(self):
|
||||
# gh-15822
|
||||
df = DataFrame(
|
||||
{
|
||||
"name": ["John", "Anderson"],
|
||||
"date": [
|
||||
Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3),
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
|
||||
|
||||
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
|
||||
|
||||
result = df.loc[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_series(self):
|
||||
# GH 19860
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
for el, item in s.items():
|
||||
assert s.at[el] == s.loc[el] == item
|
||||
for i in range(len(s)):
|
||||
assert s.iat[i] == s.iloc[i] == i + 1
|
||||
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.at[4]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.loc[4]
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
|
||||
# GH 19860
|
||||
df = DataFrame(
|
||||
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
|
||||
)
|
||||
for rowIdx, row in df.iterrows():
|
||||
for el, item in row.items():
|
||||
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
|
||||
|
||||
for row in range(2):
|
||||
for i in range(5):
|
||||
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
|
||||
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.at[0, 3]
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.loc[0, 3]
|
||||
|
||||
def test_iat_setter_incompatible_assignment(self):
|
||||
# GH 23236
|
||||
result = DataFrame({"a": [0.0, 1.0], "b": [4, 5]})
|
||||
result.iat[0, 0] = None
|
||||
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iat_dont_wrap_object_datetimelike():
|
||||
# GH#32809 .iat calls go through DataFrame._get_value, should not
|
||||
# call maybe_box_datetimelike
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
tdi = dti - dti
|
||||
ser = Series(dti.to_pydatetime(), dtype=object)
|
||||
ser2 = Series(tdi.to_pytimedelta(), dtype=object)
|
||||
df = DataFrame({"A": ser, "B": ser2})
|
||||
assert (df.dtypes == object).all()
|
||||
|
||||
for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]:
|
||||
assert result is ser[0]
|
||||
assert isinstance(result, datetime)
|
||||
assert not isinstance(result, Timestamp)
|
||||
|
||||
for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]:
|
||||
assert result is ser2[1]
|
||||
assert isinstance(result, timedelta)
|
||||
assert not isinstance(result, Timedelta)
|
||||
|
||||
|
||||
def test_at_with_tuple_index_get():
|
||||
# GH 26989
|
||||
# DataFrame.at getter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
assert df.at[(1, 2), "a"] == 1
|
||||
|
||||
# Series.at getter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
assert series.at[(1, 2)] == 1
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_at_with_tuple_index_set():
|
||||
# GH 26989
|
||||
# DataFrame.at setter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
df.at[(1, 2), "a"] = 2
|
||||
assert df.at[(1, 2), "a"] == 2
|
||||
|
||||
# Series.at setter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
series.at[1, 2] = 3
|
||||
assert series.at[1, 2] == 3
|
||||
|
||||
|
||||
class TestMultiIndexScalar:
|
||||
def test_multiindex_at_get(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc getter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
assert df.at[(1, 3), "a"] == 1
|
||||
assert df.loc[(1, 3), "a"] == 1
|
||||
|
||||
# Series.at and Series.loc getter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
assert series.at[1, 3] == 1
|
||||
assert series.loc[1, 3] == 1
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
def test_multiindex_at_set(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc setter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
df.at[(1, 3), "a"] = 3
|
||||
assert df.at[(1, 3), "a"] == 3
|
||||
df.loc[(1, 3), "a"] = 4
|
||||
assert df.loc[(1, 3), "a"] == 4
|
||||
|
||||
# Series.at and Series.loc setter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
series.at[1, 3] = 5
|
||||
assert series.at[1, 3] == 5
|
||||
series.loc[1, 3] = 6
|
||||
assert series.loc[1, 3] == 6
|
||||
|
||||
def test_multiindex_at_get_one_level(self):
|
||||
# GH#38053
|
||||
s2 = Series((0, 1), index=[[False, True]])
|
||||
result = s2.at[False]
|
||||
assert result == 0
|
Reference in New Issue
Block a user