Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,40 @@
""" common utilities """
from __future__ import annotations
from typing import (
Any,
Literal,
)
def _mklbl(prefix: str, n: int):
return [f"{prefix}{i}" for i in range(n)]
def check_indexing_smoketest_or_raises(
obj,
method: Literal["iloc", "loc"],
key: Any,
axes: Literal[0, 1] | None = None,
fails=None,
) -> None:
if axes is None:
axes_list = [0, 1]
else:
assert axes in [0, 1]
axes_list = [axes]
for ax in axes_list:
if ax < obj.ndim:
# create a tuple accessor
new_axes = [slice(None)] * obj.ndim
new_axes[ax] = key
axified = tuple(new_axes)
try:
getattr(obj, method).__getitem__(axified)
except (IndexError, TypeError, KeyError) as detail:
# if we are in fails, the ok, otherwise raise it
if fails is not None:
if isinstance(detail, fails):
return
raise

View File

@ -0,0 +1,127 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
date_range,
)
@pytest.fixture
def series_ints():
return Series(np.random.default_rng(2).random(4), index=np.arange(0, 8, 2))
@pytest.fixture
def frame_ints():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=np.arange(0, 8, 2),
columns=np.arange(0, 12, 3),
)
@pytest.fixture
def series_uints():
return Series(
np.random.default_rng(2).random(4),
index=Index(np.arange(0, 8, 2, dtype=np.uint64)),
)
@pytest.fixture
def frame_uints():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=Index(range(0, 8, 2), dtype=np.uint64),
columns=Index(range(0, 12, 3), dtype=np.uint64),
)
@pytest.fixture
def series_labels():
return Series(np.random.default_rng(2).standard_normal(4), index=list("abcd"))
@pytest.fixture
def frame_labels():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=list("abcd"),
columns=list("ABCD"),
)
@pytest.fixture
def series_ts():
return Series(
np.random.default_rng(2).standard_normal(4),
index=date_range("20130101", periods=4),
)
@pytest.fixture
def frame_ts():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=date_range("20130101", periods=4),
)
@pytest.fixture
def series_floats():
return Series(
np.random.default_rng(2).random(4),
index=Index(range(0, 8, 2), dtype=np.float64),
)
@pytest.fixture
def frame_floats():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=Index(range(0, 8, 2), dtype=np.float64),
columns=Index(range(0, 12, 3), dtype=np.float64),
)
@pytest.fixture
def series_mixed():
return Series(np.random.default_rng(2).standard_normal(4), index=[2, 4, "null", 8])
@pytest.fixture
def frame_mixed():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)), index=[2, 4, "null", 8]
)
@pytest.fixture
def frame_empty():
return DataFrame()
@pytest.fixture
def series_empty():
return Series(dtype=object)
@pytest.fixture
def frame_multi():
return DataFrame(
np.random.default_rng(2).standard_normal((4, 4)),
index=MultiIndex.from_product([[1, 2], [3, 4]]),
columns=MultiIndex.from_product([[5, 6], [7, 8]]),
)
@pytest.fixture
def series_multi():
return Series(
np.random.default_rng(2).random(4),
index=MultiIndex.from_product([[1, 2], [3, 4]]),
)

View File

@ -0,0 +1,225 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
DataFrame,
IntervalIndex,
Series,
)
import pandas._testing as tm
class TestIntervalIndex:
@pytest.fixture
def series_with_interval_index(self):
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl):
ser = series_with_interval_index.copy()
expected = ser.iloc[:3]
tm.assert_series_equal(expected, indexer_sl(ser)[:3])
tm.assert_series_equal(expected, indexer_sl(ser)[:2.5])
tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5])
if indexer_sl is tm.loc:
tm.assert_series_equal(expected, ser.loc[-1:3])
expected = ser.iloc[1:4]
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
expected = ser.iloc[2:5]
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl):
tpls = [(0, 1), (2, 3), (4, 5)]
if direction == "decreasing":
tpls = tpls[::-1]
idx = IntervalIndex.from_tuples(tpls, closed=closed)
ser = Series(list("abc"), idx)
for key, expected in zip(idx.left, ser):
if idx.closed_left:
assert indexer_sl(ser)[key] == expected
else:
with pytest.raises(KeyError, match=str(key)):
indexer_sl(ser)[key]
for key, expected in zip(idx.right, ser):
if idx.closed_right:
assert indexer_sl(ser)[key] == expected
else:
with pytest.raises(KeyError, match=str(key)):
indexer_sl(ser)[key]
for key, expected in zip(idx.mid, ser):
assert indexer_sl(ser)[key] == expected
def test_getitem_non_matching(self, series_with_interval_index, indexer_sl):
ser = series_with_interval_index.copy()
# this is a departure from our current
# indexing scheme, but simpler
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
indexer_sl(ser)[[-1, 3, 4, 5]]
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
indexer_sl(ser)[[-1, 3]]
def test_loc_getitem_large_series(self, monkeypatch):
size_cutoff = 20
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
ser = Series(
np.arange(size_cutoff),
index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)),
)
result1 = ser.loc[:8]
result2 = ser.loc[0:8]
result3 = ser.loc[0:8:1]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
def test_loc_getitem_frame(self):
# CategoricalIndex with IntervalIndex categories
df = DataFrame({"A": range(10)})
ser = pd.cut(df.A, 5)
df["B"] = ser
df = df.set_index("B")
result = df.loc[4]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match="10"):
df.loc[10]
# single list-like
result = df.loc[[4]]
expected = df.iloc[4:6]
tm.assert_frame_equal(result, expected)
# non-unique
result = df.loc[[4, 5]]
expected = df.take([4, 5, 4, 5])
tm.assert_frame_equal(result, expected)
msg = (
r"None of \[Index\(\[10\], dtype='object', name='B'\)\] "
r"are in the \[index\]"
)
with pytest.raises(KeyError, match=msg):
df.loc[[10]]
# partial missing
with pytest.raises(KeyError, match=r"\[10\] not in index"):
df.loc[[10, 4]]
def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
# GH#41831
index = IntervalIndex([np.nan, np.nan])
key = index[:-1]
obj = frame_or_series(range(2), index=index)
if frame_or_series is DataFrame and indexer_sl is tm.setitem:
obj = obj.T
result = indexer_sl(obj)[key]
expected = obj
tm.assert_equal(result, expected)
def test_setitem_interval_with_slice(self):
# GH#54722
ii = IntervalIndex.from_breaks(range(4, 15))
ser = Series(range(10), index=ii)
orig = ser.copy()
# This should be a no-op (used to raise)
ser.loc[1:3] = 20
tm.assert_series_equal(ser, orig)
ser.loc[6:8] = 19
orig.iloc[1:4] = 19
tm.assert_series_equal(ser, orig)
ser2 = Series(range(5), index=ii[::2])
orig2 = ser2.copy()
# this used to raise
ser2.loc[6:8] = 22 # <- raises on main, sets on branch
orig2.iloc[1] = 22
tm.assert_series_equal(ser2, orig2)
ser2.loc[5:7] = 21
orig2.iloc[:2] = 21
tm.assert_series_equal(ser2, orig2)
class TestIntervalIndexInsideMultiIndex:
def test_mi_intervalindex_slicing_with_scalar(self):
# GH#27456
ii = IntervalIndex.from_arrays(
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
)
idx = pd.MultiIndex.from_arrays(
[
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
pd.Index(
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
),
ii,
]
)
idx.names = ["Item", "RID", "MP"]
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
df.index = idx
query_df = DataFrame(
{
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
"RID": ["RID1", "RID1", "RID1", "RID2", "RID2"],
"MP": [0.2, 1.5, 1.6, 11.1, 10.9],
}
)
query_df = query_df.sort_index()
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
query_df.index = idx
result = df.value.loc[query_df.index]
# the IntervalIndex level is indexed with floats, which map to
# the intervals containing them. Matching the behavior we would get
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
sliced_level = ii.take([0, 1, 1, 3, 2])
expected_index = pd.MultiIndex.from_arrays(
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
)
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"base",
[101, 1010],
)
def test_reindex_behavior_with_interval_index(self, base):
# GH 51826
ser = Series(
range(base),
index=IntervalIndex.from_arrays(range(base), range(1, base + 1)),
)
expected_result = Series([np.nan, 0], index=[np.nan, 1.0], dtype=float)
result = ser.reindex(index=[np.nan, 1.0])
tm.assert_series_equal(result, expected_result)

View File

@ -0,0 +1,229 @@
import re
import numpy as np
import pytest
from pandas import (
Index,
Interval,
IntervalIndex,
Series,
)
import pandas._testing as tm
class TestIntervalIndex:
@pytest.fixture
def series_with_interval_index(self):
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
def test_loc_with_interval(self, series_with_interval_index, indexer_sl):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
ser = series_with_interval_index.copy()
expected = 0
result = indexer_sl(ser)[Interval(0, 1)]
assert result == expected
expected = ser.iloc[3:5]
result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]]
tm.assert_series_equal(expected, result)
# missing or not exact
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
indexer_sl(ser)[Interval(3, 5, closed="left")]
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
indexer_sl(ser)[Interval(3, 5)]
with pytest.raises(
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
):
indexer_sl(ser)[Interval(-2, 0)]
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
indexer_sl(ser)[Interval(5, 6)]
def test_loc_with_scalar(self, series_with_interval_index, indexer_sl):
# loc with single label / list of labels:
# - Intervals: only exact matches
# - scalars: those that contain it
ser = series_with_interval_index.copy()
assert indexer_sl(ser)[1] == 0
assert indexer_sl(ser)[1.5] == 1
assert indexer_sl(ser)[2] == 1
expected = ser.iloc[1:4]
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
expected = ser.iloc[[1, 1, 2, 1]]
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]])
expected = ser.iloc[2:5]
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
def test_loc_with_slices(self, series_with_interval_index, indexer_sl):
# loc with slices:
# - Interval objects: only works with exact matches
# - scalars: only works for non-overlapping, monotonic intervals,
# and start/stop select location based on the interval that
# contains them:
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
ser = series_with_interval_index.copy()
# slice of interval
expected = ser.iloc[:3]
result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)]
tm.assert_series_equal(expected, result)
expected = ser.iloc[3:]
result = indexer_sl(ser)[Interval(3, 4) :]
tm.assert_series_equal(expected, result)
msg = "Interval objects are not currently supported"
with pytest.raises(NotImplementedError, match=msg):
indexer_sl(ser)[Interval(3, 6) :]
with pytest.raises(NotImplementedError, match=msg):
indexer_sl(ser)[Interval(3, 4, closed="left") :]
def test_slice_step_ne1(self, series_with_interval_index):
# GH#31658 slice of scalar with step != 1
ser = series_with_interval_index.copy()
expected = ser.iloc[0:4:2]
result = ser[0:4:2]
tm.assert_series_equal(result, expected)
result2 = ser[0:4][::2]
tm.assert_series_equal(result2, expected)
def test_slice_float_start_stop(self, series_with_interval_index):
# GH#31658 slicing with integers is positional, with floats is not
# supported
ser = series_with_interval_index.copy()
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
with pytest.raises(ValueError, match=msg):
ser[1.5:9.5:2]
def test_slice_interval_step(self, series_with_interval_index):
# GH#31658 allows for integer step!=1, not Interval step
ser = series_with_interval_index.copy()
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
with pytest.raises(ValueError, match=msg):
ser[0 : 4 : Interval(0, 1)]
def test_loc_with_overlap(self, indexer_sl):
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
ser = Series(range(len(idx)), index=idx)
# scalar
expected = ser
result = indexer_sl(ser)[4]
tm.assert_series_equal(expected, result)
result = indexer_sl(ser)[[4]]
tm.assert_series_equal(expected, result)
# interval
expected = 0
result = indexer_sl(ser)[Interval(1, 5)]
assert expected == result
expected = ser
result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]]
tm.assert_series_equal(expected, result)
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
indexer_sl(ser)[Interval(3, 5)]
msg = (
r"None of \[IntervalIndex\(\[\(3, 5\]\], "
r"dtype='interval\[int64, right\]'\)\] are in the \[index\]"
)
with pytest.raises(KeyError, match=msg):
indexer_sl(ser)[[Interval(3, 5)]]
# slices with interval (only exact matches)
expected = ser
result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)]
tm.assert_series_equal(expected, result)
msg = (
"'can only get slices from an IntervalIndex if bounds are "
"non-overlapping and all monotonic increasing or decreasing'"
)
with pytest.raises(KeyError, match=msg):
indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)]
if indexer_sl is tm.loc:
# slices with scalar raise for overlapping intervals
# TODO KeyError is the appropriate error?
with pytest.raises(KeyError, match=msg):
ser.loc[1:4]
def test_non_unique(self, indexer_sl):
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
ser = Series(range(len(idx)), index=idx)
result = indexer_sl(ser)[Interval(1, 3)]
assert result == 0
result = indexer_sl(ser)[[Interval(1, 3)]]
expected = ser.iloc[0:1]
tm.assert_series_equal(expected, result)
def test_non_unique_moar(self, indexer_sl):
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
ser = Series(range(len(idx)), index=idx)
expected = ser.iloc[[0, 1]]
result = indexer_sl(ser)[Interval(1, 3)]
tm.assert_series_equal(expected, result)
expected = ser
result = indexer_sl(ser)[Interval(1, 3) :]
tm.assert_series_equal(expected, result)
expected = ser.iloc[[0, 1]]
result = indexer_sl(ser)[[Interval(1, 3)]]
tm.assert_series_equal(expected, result)
def test_loc_getitem_missing_key_error_message(
self, frame_or_series, series_with_interval_index
):
# GH#27365
ser = series_with_interval_index.copy()
obj = frame_or_series(ser)
with pytest.raises(KeyError, match=r"\[6\]"):
obj.loc[[4, 5, 6]]
@pytest.mark.parametrize(
"intervals",
[
([Interval(-np.inf, 0.0), Interval(0.0, 1.0)]),
([Interval(-np.inf, -2.0), Interval(-2.0, -1.0)]),
([Interval(-1.0, 0.0), Interval(0.0, np.inf)]),
([Interval(1.0, 2.0), Interval(2.0, np.inf)]),
],
)
def test_repeating_interval_index_with_infs(intervals):
# GH 46658
interval_index = Index(intervals * 51)
expected = np.arange(1, 102, 2, dtype=np.intp)
result = interval_index.get_indexer_for([intervals[1]])
tm.assert_equal(result, expected)

View File

@ -0,0 +1,87 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.errors import SettingWithCopyError
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):
# Inplace ops, originally from:
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
events = {
("eyes", "left"): a,
("eyes", "right"): b,
("ears", "left"): c,
("ears", "right"): d,
}
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
zed = DataFrame(events, index=["a", "b"], columns=multiind)
if using_copy_on_write:
with tm.raises_chained_assignment_error():
zed["eyes"]["right"].fillna(value=555, inplace=True)
elif warn_copy_on_write:
with tm.assert_produces_warning(None):
zed["eyes"]["right"].fillna(value=555, inplace=True)
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.assert_produces_warning(None):
zed["eyes"]["right"].fillna(value=555, inplace=True)
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
# 5216
# make sure that we don't try to set a dead cache
a = np.random.default_rng(2).random((10, 3))
df = DataFrame(a, columns=["x", "y", "z"])
df_original = df.copy()
tuples = [(i, j) for i in range(5) for j in range(2)]
index = MultiIndex.from_tuples(tuples)
df.index = index
# setting via chained assignment
# but actually works, since everything is a view
with tm.raises_chained_assignment_error():
df.loc[0]["z"].iloc[0] = 1.0
if using_copy_on_write:
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
result = df.loc[(0, 0), "z"]
assert result == 1
# correct setting
df.loc[(0, 0), "z"] = 2
result = df.loc[(0, 0), "z"]
assert result == 2
def test_indexer_caching(monkeypatch):
# GH5727
# make sure that indexers are in the _internal_names_set
size_cutoff = 20
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
s = Series(np.zeros(size_cutoff), index=index)
# setitem
s[s == 0] = 1
expected = Series(np.ones(size_cutoff), index=index)
tm.assert_series_equal(s, expected)

View File

@ -0,0 +1,50 @@
from datetime import datetime
import numpy as np
from pandas import (
DataFrame,
Index,
MultiIndex,
Period,
Series,
period_range,
to_datetime,
)
import pandas._testing as tm
def test_multiindex_period_datetime():
# GH4861, using datetime in period of multiindex raises exception
idx1 = Index(["a", "a", "a", "b", "b"])
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2])
# try Period as index
expected = s.iloc[0]
result = s.loc["a", Period("2012-01")]
assert result == expected
# try datetime as index
result = s.loc["a", datetime(2012, 1, 1)]
assert result == expected
def test_multiindex_datetime_columns():
# GH35015, using datetime as column indices raises exception
mi = MultiIndex.from_tuples(
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
)
df = DataFrame([], columns=mi)
expected_df = DataFrame(
[],
columns=MultiIndex.from_arrays(
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
),
)
tm.assert_frame_equal(df, expected_df)

View File

@ -0,0 +1,410 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.core.indexing import IndexingError
# ----------------------------------------------------------------------------
# test indexing of Series with multi-level Index
# ----------------------------------------------------------------------------
@pytest.mark.parametrize(
"access_method",
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
)
@pytest.mark.parametrize(
"level1_value, expected",
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
)
def test_series_getitem_multiindex(access_method, level1_value, expected):
# GH 6018
# series regression getitem with a multi-index
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
ser = Series([1, 2, 3], index=mi)
expected.index.name = "A"
result = access_method(ser, level1_value)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("level0_value", ["D", "A"])
def test_series_getitem_duplicates_multiindex(level0_value):
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
# the appropriate error, only in PY3 of course!
index = MultiIndex(
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
arr = np.random.default_rng(2).standard_normal((len(index), 1))
df = DataFrame(arr, index=index, columns=["val"])
# confirm indexing on missing value raises KeyError
if level0_value != "A":
with pytest.raises(KeyError, match=r"^'A'$"):
df.val["A"]
with pytest.raises(KeyError, match=r"^'X'$"):
df.val["X"]
result = df.val[level0_value]
expected = Series(
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
)
tm.assert_series_equal(result, expected)
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[42:65])
expected.index = expected.index.droplevel(0).droplevel(0)
result = indexer_sl(s)[2000, 3]
tm.assert_series_equal(result, expected)
def test_series_getitem_returns_scalar(
multiindex_year_month_day_dataframe_random_data, indexer_sl
):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.iloc[49]
result = indexer_sl(s)[2000, 3, 10]
assert result == expected
@pytest.mark.parametrize(
"indexer,expected_error,expected_error_msg",
[
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
(
lambda s: s.iloc[len(s)],
IndexError,
"single positional indexer is out-of-bounds",
),
],
)
def test_series_getitem_indexing_errors(
multiindex_year_month_day_dataframe_random_data,
indexer,
expected_error,
expected_error_msg,
):
s = multiindex_year_month_day_dataframe_random_data["A"]
with pytest.raises(expected_error, match=expected_error_msg):
indexer(s)
def test_series_getitem_corner_generator(
multiindex_year_month_day_dataframe_random_data,
):
s = multiindex_year_month_day_dataframe_random_data["A"]
result = s[(x > 0 for x in s)]
expected = s[s > 0]
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index
# ----------------------------------------------------------------------------
def test_getitem_simple(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data.T
expected = df.values[:, 0]
result = df["foo", "one"].values
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_error_msg",
[
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
(lambda df: df["foobar"], r"^'foobar'$"),
],
)
def test_frame_getitem_simple_key_error(
multiindex_dataframe_random_data, indexer, expected_error_msg
):
df = multiindex_dataframe_random_data.T
with pytest.raises(KeyError, match=expected_error_msg):
indexer(df)
def test_tuple_string_column_names():
# GH#50372
mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
df["single_index"] = 0
df_flat = df.copy()
df_flat.columns = df_flat.columns.to_flat_index()
df_flat["new_single_index"] = 0
result = df_flat[[("a", "aa"), "new_single_index"]]
expected = DataFrame(
[[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"])
)
tm.assert_frame_equal(result, expected)
def test_frame_getitem_multicolumn_empty_level():
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
df.columns = [
["level1 item1", "level1 item2"],
["", "level2 item2"],
["level3 item1", "level3 item2"],
]
result = df["level1 item1"]
expected = DataFrame(
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_slice",
[
(lambda df: df["foo"], slice(3)),
(lambda df: df["bar"], slice(3, 5)),
(lambda df: df.loc[:, "bar"], slice(3, 5)),
],
)
def test_frame_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice
):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mixed_depth_get():
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
result = df["a"]
expected = df["a", "", ""].rename("a")
tm.assert_series_equal(result, expected)
result = df["routine1", "result1"]
expected = df["routine1", "result1", ""]
expected = expected.rename(("routine1", "result1"))
tm.assert_series_equal(result, expected)
def test_frame_getitem_nan_multiindex(nulls_fixture):
# GH#29751
# loc on a multiindex containing nan values
n = nulls_fixture # for code readability
cols = ["a", "b", "c"]
df = DataFrame(
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
columns=cols,
).set_index(["a", "b"])
df["c"] = df["c"].astype("int64")
idx = (21, n)
result = df.loc[:idx]
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
result = df.loc[idx:]
expected = DataFrame(
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
idx1, idx2 = (21, n), (31, n)
result = df.loc[idx1:idx2]
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected",
[
(
(["b"], ["bar", np.nan]),
(
DataFrame(
[[2, 3], [5, 6]],
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
dtype="int64",
)
),
),
(
(["a", "b"]),
(
DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples(
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
),
dtype="int64",
)
),
),
(
(["b"]),
(
DataFrame(
[[2, 3], [5, 6]],
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
dtype="int64",
)
),
),
(
(["b"], ["bar"]),
(
DataFrame(
[[2], [5]],
columns=MultiIndex.from_tuples([("b", "bar")]),
dtype="int64",
)
),
),
(
(["b"], [np.nan]),
(
DataFrame(
[[3], [6]],
columns=MultiIndex(
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
),
dtype="int64",
)
),
),
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
],
)
def test_frame_getitem_nan_cols_multiindex(
indexer,
expected,
nulls_fixture,
):
# Slicing MultiIndex including levels with nan values, for more information
# see GH#25154
df = DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples(
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
),
dtype="int64",
)
result = df.loc[:, indexer]
tm.assert_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index with duplicates
# ----------------------------------------------------------------------------
@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
index = ["h1", "h3", "h5"]
columns = MultiIndex(
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=["main", "sub"],
)
return DataFrame(data, index=index, columns=columns)
@pytest.mark.parametrize(
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
)
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
df = dataframe_with_duplicate_index
index = Index(["h1", "h3", "h5"])
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
result = df["A"]["A1"]
tm.assert_series_equal(result, expected)
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame(
[["d", 4, 4], ["e", 5, 5]],
index=Index(["B2", "B2"], name="sub"),
columns=["h1", "h3", "h5"],
).T
result = df["A"]["B2"]
tm.assert_frame_equal(result, expected)
def test_frame_mi_empty_slice():
# GH 15454
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
result = df[[]]
expected = DataFrame(
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
)
tm.assert_frame_equal(result, expected)
def test_loc_empty_multiindex():
# GH#36936
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
# loc on empty multiindex == loc with False mask
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
result = df.loc[empty_multiindex, :]
expected = df.loc[[False] * len(df.index), :]
tm.assert_frame_equal(result, expected)
# replacing value with loc on empty multiindex
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
result = df
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,171 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def simple_multiindex_dataframe():
"""
Factory function to create simple 3 x 3 dataframe with
both columns and row MultiIndex using supplied data or
random data by default.
"""
data = np.random.default_rng(2).standard_normal((3, 3))
return DataFrame(
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
)
@pytest.mark.parametrize(
"indexer, expected",
[
(
lambda df: df.iloc[0],
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
),
(
lambda df: df.iloc[2],
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
),
(
lambda df: df.iloc[:, 2],
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
),
],
)
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
df = simple_multiindex_dataframe
arr = df.values
result = indexer(df)
expected = expected(arr)
tm.assert_series_equal(result, expected)
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
df = simple_multiindex_dataframe
result = df.iloc[[0, 1]]
expected = df.xs(4, drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_returns_scalar(simple_multiindex_dataframe):
df = simple_multiindex_dataframe
arr = df.values
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_iloc_getitem_multiple_items():
# GH 5528
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
index = MultiIndex.from_tuples(tup)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index)
result = df.iloc[[2, 3]]
expected = df.xs("b", drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labels():
# this is basically regular indexing
arr = np.random.default_rng(2).standard_normal((4, 3))
df = DataFrame(
arr,
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
)
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_frame_getitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[:4]
expected = df[:4]
tm.assert_frame_equal(result, expected)
def test_frame_setitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
df.iloc[:4] = 0
assert (df.values[:4] == 0).all()
assert (df.values[4:] != 0).all()
def test_indexing_ambiguity_bug_1678():
# GH 1678
columns = MultiIndex.from_tuples(
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
)
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
result = df.iloc[:, 1]
expected = df.loc[:, ("Ohio", "Red")]
tm.assert_series_equal(result, expected)
def test_iloc_integer_locations():
# GH 13797
data = [
["str00", "str01"],
["str10", "str11"],
["str20", "srt21"],
["str30", "str31"],
["str40", "str41"],
]
index = MultiIndex.from_tuples(
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
)
expected = DataFrame(data)
df = DataFrame(data, index=index)
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"data, indexes, values, expected_k",
[
# test without indexer value in first level of MultiIndex
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
# test like code sample 1 in the issue
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
# test like code sample 2 in the issue
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
# test like code sample 3 in the issue
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
],
)
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
# GH17148
df = DataFrame(data=data, columns=["i", "j", "k"])
df = df.set_index(["i", "j"])
series = df.k.copy()
for i, v in zip(indexes, values):
series.iloc[i] += v
df["k"] = expected_k
expected = df.k
tm.assert_series_equal(series, expected)
def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,118 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
@pytest.fixture
def m():
return 5
@pytest.fixture
def n():
return 100
@pytest.fixture
def cols():
return ["jim", "joe", "jolie", "joline", "jolia"]
@pytest.fixture
def vals(n):
vals = [
np.random.default_rng(2).integers(0, 10, n),
np.random.default_rng(2).choice(list("abcdefghij"), n),
np.random.default_rng(2).choice(
pd.date_range("20141009", periods=10).tolist(), n
),
np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
np.random.default_rng(2).standard_normal(n),
]
vals = list(map(tuple, zip(*vals)))
return vals
@pytest.fixture
def keys(n, m, vals):
# bunch of keys for testing
keys = [
np.random.default_rng(2).integers(0, 11, m),
np.random.default_rng(2).choice(list("abcdefghijk"), m),
np.random.default_rng(2).choice(
pd.date_range("20141009", periods=11).tolist(), m
),
np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
]
keys = list(map(tuple, zip(*keys)))
keys += [t[:-1] for t in vals[:: n // m]]
return keys
# covers both unique index and non-unique index
@pytest.fixture
def df(vals, cols):
return DataFrame(vals, columns=cols)
@pytest.fixture
def a(df):
return pd.concat([df, df])
@pytest.fixture
def b(df, cols):
return df.drop_duplicates(subset=cols[:-1])
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
# GH7724, GH2646
frame = request.getfixturevalue(frame_fixture)
if lexsort_depth == 0:
df = frame.copy(deep=False)
else:
df = frame.sort_values(by=cols[:lexsort_depth])
mi = df.set_index(cols[:-1])
assert not mi.index._lexsort_depth < lexsort_depth
for key in keys:
mask = np.ones(len(df), dtype=bool)
# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k
if not mask.any():
assert key[: i + 1] not in mi.index
continue
assert key[: i + 1] in mi.index
right = df[mask].copy(deep=False)
if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
right["jolia"].values, name=right.index[0], index=["jolia"]
)
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

View File

@ -0,0 +1,992 @@
import numpy as np
import pytest
from pandas.errors import (
IndexingError,
PerformanceWarning,
)
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
class TestMultiIndexLoc:
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5
# with integer labels
df = frame.copy()
df.columns = list(range(3))
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7
def test_loc_getitem_general(self, any_real_numpy_dtype):
# GH#2817
dtype = any_real_numpy_dtype
data = {
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
"num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
}
df = DataFrame(data)
df = df.astype({"col": dtype, "num": dtype})
df = df.set_index(keys=["col", "num"])
key = 4.0, 12
# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
# this is ok
return_value = df.sort_index(inplace=True)
assert return_value is None
res = df.loc[key]
# col has float dtype, result should be float64 Index
col_arr = np.array([4.0] * 3, dtype=dtype)
year_arr = np.array([12] * 3, dtype=dtype)
index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
def test_loc_getitem_multiindex_missing_label_raises(self):
# GH#21593
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]
def test_loc_getitem_list_of_tuples_with_multiindex(
self, multiindex_year_month_day_dataframe_random_data
):
ser = multiindex_year_month_day_dataframe_random_data["A"]
expected = ser.reindex(ser.index[49:51])
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)
def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = Series([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
result = x.loc[[1, 3]]
tm.assert_series_equal(result, expected)
# GH15424
y1 = Series([1, 3], index=[1, 2])
result = x.loc[y1]
tm.assert_series_equal(result, expected)
empty = Series(data=[], dtype=np.float64)
expected = Series(
[],
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
dtype=np.float64,
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = np.array([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
# empty array:
empty = np.array([])
expected = Series(
[],
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
dtype="float64",
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
# 0-dim array (scalar):
scalar = np.int64(1)
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
result = x.loc[scalar]
tm.assert_series_equal(result, expected)
def test_loc_multiindex_labels(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j"], ["X", "X", "Y"]],
)
# the first 2 rows
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc["i"]
tm.assert_frame_equal(result, expected)
# 2nd (last) column
expected = df.iloc[:, [2]].droplevel(0, axis=1)
result = df.loc[:, "j"]
tm.assert_frame_equal(result, expected)
# bottom right corner
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
result = df.loc["j"].loc[:, "j"]
tm.assert_frame_equal(result, expected)
# with a tuple
expected = df.iloc[[0, 1]]
result = df.loc[("i", "X")]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_ints(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc[4]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_missing_label_raises(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
def test_loc_multiindex_list_missing_label(self, key, pos):
# GH 27148 - lists with missing labels _do_ raise
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match="not in index"):
df.loc[key]
def test_loc_multiindex_too_many_dims_raises(self):
# GH 14885
s = Series(
range(8),
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
)
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
s.loc["a", "b"]
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
s.loc["a", "d", "g"]
with pytest.raises(IndexingError, match="Too many indexers"):
s.loc["a", "d", "g", "j"]
def test_loc_multiindex_indexer_none(self):
# GH6788
# multi-index indexer is None (meaning take all)
attributes = ["Attribute" + str(i) for i in range(1)]
attribute_values = ["Value" + str(i) for i in range(5)]
index = MultiIndex.from_product([attributes, attribute_values])
df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
df = DataFrame(df, columns=index)
result = df[attributes]
tm.assert_frame_equal(result, df)
# GH 7349
# loc with a multi-index seems to be doing fallback
df = DataFrame(
np.arange(12).reshape(-1, 1),
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
)
expected = df.loc[([1, 2],), :]
result = df.loc[[1, 2]]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_incomplete(self):
# GH 7399
# incomplete indexers
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.loc[:, "a":"c"]
result = s.loc[0:4, "a":"c"]
tm.assert_series_equal(result, expected)
result = s.loc[:4, "a":"c"]
tm.assert_series_equal(result, expected)
result = s.loc[0:, "a":"c"]
tm.assert_series_equal(result, expected)
# GH 7400
# multiindexer getitem with list of indexers skips wrong element
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
result = s.loc[2:4:2, "a":"c"]
tm.assert_series_equal(result, expected)
def test_get_loc_single_level(self, single_level_multiindex):
single_level = single_level_multiindex
s = Series(
np.random.default_rng(2).standard_normal(len(single_level)),
index=single_level,
)
for k in single_level.values:
s[k]
def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)
# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ["a", "b"]
tm.assert_frame_equal(result, expected)
result = df.loc[:, 10]
expected = df[10]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
)
@pytest.mark.parametrize(
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
)
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)
a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
)
keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)
# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys)
)
if indexer_type_1 is set or indexer_type_2 is set:
with pytest.raises(TypeError, match="as an indexer is not supported"):
df.loc[indexer, "Data"]
return
else:
result = df.loc[indexer, "Data"]
expected = Series(
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
)
tm.assert_series_equal(result, expected)
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
# GH#37711
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
obj = frame_or_series([1, 2], index=mi)
obj.loc[("a",)] = 0
expected = frame_or_series([0, 2], index=mi)
tm.assert_equal(obj, expected)
@pytest.mark.parametrize("indexer", [("a",), ("a")])
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
# GH#37711
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
obj = DataFrame([1, 2], index=mi)
obj.loc[indexer, :] = 0
expected = DataFrame([0, 2], index=mi)
tm.assert_frame_equal(obj, expected)
@pytest.mark.parametrize(
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
)
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
# GH#39147
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
df.loc[indexer, ["c", "d"]] = 1.0
expected = DataFrame(
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
index=mi,
columns=["a", "b", "c", "d"],
)
tm.assert_frame_equal(df, expected)
def test_sorted_multiindex_after_union(self):
# GH#44752
midx = MultiIndex.from_product(
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
)
ser1 = Series(1, index=midx)
ser2 = Series(1, index=midx[:2])
df = pd.concat([ser1, ser2], axis=1)
expected = df.copy()
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
df = DataFrame({0: ser1, 1: ser2})
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
def test_loc_no_second_level_index(self):
# GH#43599
df = DataFrame(
index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
columns=["Val"],
)
res = df.loc[np.s_[:, "c", :]]
expected = DataFrame(
index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
)
tm.assert_frame_equal(res, expected)
def test_loc_multi_index_key_error(self):
# GH 51892
df = DataFrame(
{
(1, 2): ["a", "b", "c"],
(1, 3): ["d", "e", "f"],
(2, 2): ["g", "h", "i"],
(2, 4): ["j", "k", "l"],
}
)
with pytest.raises(KeyError, match=r"(1, 4)"):
df.loc[0, (1, 4)]
@pytest.mark.parametrize(
"indexer, pos",
[
([], []), # empty ok
(["A"], slice(3)),
(["A", "D"], []), # "D" isn't present -> raise
(["D", "E"], []), # no values found -> raise
(["D"], []), # same, with single item list: GH 27148
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
],
)
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product(
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
)
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
expected = ser.iloc[pos]
if expected.size == 0 and indexer != []:
with pytest.raises(KeyError, match=str(indexer)):
ser.loc[indexer]
elif indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
with pytest.raises(KeyError, match="'bah'"):
ser.loc[indexer]
else:
result = ser.loc[indexer]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
# GH 8737
# empty indexer
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 6)),
index=range(5),
columns=multi_index,
)
df = df.sort_index(level=0, axis=1)
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
result = df.loc[:, columns_indexer]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
# regression from < 0.14.0
# GH 7914
df = DataFrame(
[[np.mean, np.median], ["mean", "median"]],
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
index=["function", "name"],
)
result = df.loc["function", ("functs", "mean")]
expected = np.mean
assert result == expected
def test_loc_getitem_tuple_plus_slice():
# GH 671
df = DataFrame(
{
"a": np.arange(10),
"b": np.arange(10),
"c": np.random.default_rng(2).standard_normal(10),
"d": np.random.default_rng(2).standard_normal(10),
}
).set_index(["a", "b"])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
def test_loc_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
with pytest.raises(KeyError, match=r"^3$"):
df.loc[3]
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
# test setup - check key not in dataframe
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
df.loc[("bar", "three"), "B"]
# in theory should be inserting in a sorted space????
df.loc[("bar", "three"), "B"] = 0
expected = 0
result = df.sort_index().loc[("bar", "three"), "B"]
assert result == expected
def test_loc_setitem_single_column_slice():
# case from https://github.com/pandas-dev/pandas/issues/27841
df = DataFrame(
"string",
index=list("abcd"),
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
)
df["labels"] = "a"
df.loc[:, "labels"] = df.index
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
# test with non-object block
df = DataFrame(
np.nan,
index=range(4),
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
)
expected = df.copy()
df.loc[:, "B"] = np.arange(4)
expected.iloc[:, 2] = np.arange(4)
tm.assert_frame_equal(df, expected)
def test_loc_nan_multiindex(using_infer_string):
# GH 5286
tups = [
("Good Things", "C", np.nan),
("Good Things", "R", np.nan),
("Bad Things", "C", np.nan),
("Bad Things", "T", np.nan),
("Okay Things", "N", "B"),
("Okay Things", "N", "D"),
("Okay Things", "B", np.nan),
("Okay Things", "D", np.nan),
]
df = DataFrame(
np.ones((8, 4)),
columns=Index(["d1", "d2", "d3", "d4"]),
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
)
result = df.loc["Good Things"].loc["C"]
expected = DataFrame(
np.ones((1, 4)),
index=Index(
[np.nan],
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
name="u3",
),
columns=Index(["d1", "d2", "d3", "d4"]),
)
tm.assert_frame_equal(result, expected)
def test_loc_period_string_indexing():
# GH 9892
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
i = (1111, 2222, 3333)
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
df = DataFrame(
index=idx,
columns=(
"OMS",
"OMK",
"RES",
"DRIFT_IND",
"OEVRIG_IND",
"FIN_IND",
"VARE_UD",
"LOEN_UD",
"FIN_UD",
),
)
result = df.loc[("2013Q1", 1111), "OMS"]
alt = df.loc[(a[0], 1111), "OMS"]
assert np.isnan(alt)
# Because the resolution of the string matches, it is an exact lookup,
# not a slice
assert np.isnan(result)
alt = df.loc[("2013Q1", 1111), "OMS"]
assert np.isnan(alt)
def test_loc_datetime_mask_slicing():
# GH 16699
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
df = DataFrame(
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
)
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
expected = Series(
[3],
name="C1",
index=MultiIndex.from_tuples(
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
names=["Idx1", "Idx2"],
),
)
tm.assert_series_equal(result, expected)
def test_loc_datetime_series_tuple_slicing():
# https://github.com/pandas-dev/pandas/issues/35858
date = pd.Timestamp("2000")
ser = Series(
1,
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
name="c",
)
result = ser.loc[:, [date]]
tm.assert_series_equal(result, ser)
def test_loc_with_mi_indexer():
# https://github.com/pandas-dev/pandas/issues/35351
df = DataFrame(
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
index=MultiIndex.from_tuples(
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
),
columns=["author", "price"],
)
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
result = df.loc[idx, :]
expected = DataFrame(
[["a", 1], ["b", 1], ["c", 2]],
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
columns=["author", "price"],
)
tm.assert_frame_equal(result, expected)
def test_loc_mi_with_level1_named_0():
# GH#37194
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
ser = Series(range(3), index=dti)
df = ser.to_frame()
df[1] = dti
df2 = df.set_index(0, append=True)
assert df2.index.names == (None, 0)
df2.index.get_loc(dti[0]) # smoke test
result = df2.loc[dti[0]]
expected = df2.iloc[[0]].droplevel(None)
tm.assert_frame_equal(result, expected)
ser2 = df2[1]
assert ser2.index.names == (None, 0)
result = ser2.loc[dti[0]]
expected = ser2.iloc[[0]].droplevel(None)
tm.assert_series_equal(result, expected)
def test_getitem_str_slice():
# GH#15928
df = DataFrame(
[
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
],
columns="time,ticker,bid,ask".split(","),
)
df2 = df.set_index(["ticker", "time"]).sort_index()
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
tm.assert_frame_equal(res, expected)
def test_3levels_leading_period_index():
# GH#24091
pi = pd.PeriodIndex(
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
name="datetime",
freq="D",
)
lev2 = ["A", "A", "Z", "W"]
lev3 = ["B", "C", "Q", "F"]
mi = MultiIndex.from_arrays([pi, lev2, lev3])
ser = Series(range(4), index=mi, dtype=np.float64)
result = ser.loc[(pi[0], "A", "B")]
assert result == 0.0
class TestKeyErrorsWithMultiIndex:
def test_missing_keys_raises_keyerror(self):
# GH#27420 KeyError, not TypeError
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
df2 = df.set_index(["A", "B"])
with pytest.raises(KeyError, match="1"):
df2.loc[(1, 6)]
def test_missing_key_raises_keyerror2(self):
# GH#21168 KeyError, not "IndexingError: Too many indexers"
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
with pytest.raises(KeyError, match=r"\(0, 3\)"):
ser.loc[0, 3]
def test_missing_key_combination(self):
# GH: 19556
mi = MultiIndex.from_arrays(
[
np.array(["a", "a", "b", "b"]),
np.array(["1", "2", "2", "3"]),
np.array(["c", "d", "c", "d"]),
],
names=["one", "two", "three"],
)
df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
msg = r"\('b', '1', slice\(None, None, None\)\)"
with pytest.raises(KeyError, match=msg):
df.loc[("b", "1", slice(None)), :]
with pytest.raises(KeyError, match=msg):
df.index.get_locs(("b", "1", slice(None)))
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
df.loc[("b", "1"), :]
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
df = multiindex_year_month_day_dataframe_random_data
ser = df["A"]
result = ser[2000, 5]
expected = df.loc[2000, 5]["A"]
tm.assert_series_equal(result, expected)
def test_loc_with_nan():
# GH: 27104
df = DataFrame(
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
).set_index(["ind1", "ind2"])
result = df.loc[["a"]]
expected = DataFrame(
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
)
tm.assert_frame_equal(result, expected)
result = df.loc["a"]
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
tm.assert_frame_equal(result, expected)
def test_getitem_non_found_tuple():
# GH: 25236
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
["a", "b", "c"]
)
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
df.loc[(2.0, 2.0, 3.0)]
def test_get_loc_datetime_index():
# GH#24263
index = pd.date_range("2001-01-01", periods=100)
mi = MultiIndex.from_arrays([index])
# Check if get_loc matches for Index and MultiIndex
assert mi.get_loc("2001-01") == slice(0, 31, None)
assert index.get_loc("2001-01") == slice(0, 31, None)
loc = mi[::2].get_loc("2001-01")
expected = index[::2].get_loc("2001-01")
assert loc == expected
loc = mi.repeat(2).get_loc("2001-01")
expected = index.repeat(2).get_loc("2001-01")
assert loc == expected
loc = mi.append(mi).get_loc("2001-01")
expected = index.append(index).get_loc("2001-01")
# TODO: standardize return type for MultiIndex.get_loc
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
def test_loc_setitem_indexer_differently_ordered():
# GH#34603
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
indexer = ("a", [1, 0])
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
tm.assert_frame_equal(df, expected)
def test_loc_getitem_index_differently_ordered_slice_none():
# GH#31330
df = DataFrame(
[[1, 2], [3, 4], [5, 6], [7, 8]],
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
columns=["a", "b"],
)
result = df.loc[(slice(None), [2, 1]), :]
expected = DataFrame(
[[3, 4], [7, 8], [1, 2], [5, 6]],
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
columns=["a", "b"],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
# GH#40978
df = DataFrame(
[1] * 8,
index=MultiIndex.from_tuples(
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
),
columns=["a"],
)
result = df.loc[(slice(None), indexer), :]
expected = DataFrame(
[1] * 8,
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
columns=["a"],
)
tm.assert_frame_equal(result, expected)
result = df.loc[df.index.isin(indexer, level=1), :]
tm.assert_frame_equal(result, df)
def test_loc_getitem_drops_levels_for_one_row_dataframe():
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
df = DataFrame({"d": [0]}, index=mi)
expected = df.droplevel([0, 2])
result = df.loc["x", :, "z"]
tm.assert_frame_equal(result, expected)
ser = Series([0], index=mi)
result = ser.loc["x", :, "z"]
expected = Series([0], index=Index(["y"], name="b"))
tm.assert_series_equal(result, expected)
def test_mi_columns_loc_list_label_order():
# GH 10710
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
df = DataFrame(np.zeros((5, 6)), columns=cols)
result = df.loc[:, ["B", "A"]]
expected = DataFrame(
np.zeros((5, 4)),
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
)
tm.assert_frame_equal(result, expected)
def test_mi_partial_indexing_list_raises():
# GH 13501
frame = DataFrame(
np.arange(12).reshape((4, 3)),
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
)
frame.index.names = ["key1", "key2"]
frame.columns.names = ["state", "color"]
with pytest.raises(KeyError, match="\\[2\\] not in index"):
frame.loc[["b", 2], "Colorado"]
def test_mi_indexing_list_nonexistent_raises():
# GH 15452
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
s.loc[["not", "found"]]
def test_mi_add_cell_missing_row_non_unique():
# GH 16018
result = DataFrame(
[[1, 2, 5, 6], [3, 4, 7, 8]],
index=["a", "a"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
)
result.loc["c"] = -1
result.loc["c", (1, "A")] = 3
result.loc["d", (1, "A")] = 3
expected = DataFrame(
[
[1.0, 2.0, 5.0, 6.0],
[3.0, 4.0, 7.0, 8.0],
[3.0, -1.0, -1, -1],
[3.0, np.nan, np.nan, np.nan],
],
index=["a", "a", "c", "d"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
)
tm.assert_frame_equal(result, expected)
def test_loc_get_scalar_casting_to_float():
# GH#41369
df = DataFrame(
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
)
result = df.loc[(3, 4), "b"]
assert result == 2
assert isinstance(result, np.int64)
result = df.loc[[(3, 4)], "b"].iloc[0]
assert result == 2
assert isinstance(result, np.int64)
def test_loc_empty_single_selector_with_names():
# GH 19517
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
s2 = Series(index=idx, dtype=np.float64)
result = s2.loc["a"]
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
tm.assert_series_equal(result, expected)
def test_loc_keyerror_rightmost_key_missing():
# GH 20951
df = DataFrame(
{
"A": [100, 100, 200, 200, 300, 300],
"B": [10, 10, 20, 21, 31, 33],
"C": range(6),
}
)
df = df.set_index(["A", "B"])
with pytest.raises(KeyError, match="^1$"):
df.loc[(100, 1)]
def test_multindex_series_loc_with_tuple_label():
# GH#43908
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
ser = Series([1, 2], index=mi)
result = ser.loc[(3, (4, 5))]
assert result == 2

View File

@ -0,0 +1,235 @@
import numpy as np
import pytest
import pandas._libs.index as libindex
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
CategoricalDtype,
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.core.arrays.boolean import BooleanDtype
class TestMultiIndexBasic:
def test_multiindex_perf_warn(self):
df = DataFrame(
{
"jim": [0, 0, 1, 1],
"joe": ["x", "x", "z", "y"],
"jolie": np.random.default_rng(2).random(4),
}
).set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(1, "z")]
df = df.iloc[[2, 1, 3, 0]]
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0,)]
@pytest.mark.parametrize("offset", [-5, 5])
def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset):
size_cutoff = 20
n = size_cutoff + offset
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
# hai it works!
assert s[("a", 5)] == 5
assert s[("a", 6)] == 6
assert s[("a", 7)] == 7
def test_multi_nan_indexing(self):
# GH 3588
df = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
result = df.set_index(["a", "b"], drop=False)
expected = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
},
index=[
Index(["R1", "R2", np.nan, "R4"], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(result, expected)
def test_exclusive_nat_column_indexing(self):
# GH 38025
# test multi indexing when one column exclusively contains NaT values
df = DataFrame(
{
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
df = df.set_index(["a", "b"])
expected = DataFrame(
{
"c": [10, 15, np.nan, 20],
},
index=[
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(df, expected)
def test_nested_tuples_duplicates(self):
# GH#30892
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
idx = Index(["a", "a", "c"])
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
df2 = df.copy(deep=True)
df2.loc[(dti[0], "a"), "c2"] = 1.0
tm.assert_frame_equal(df2, expected)
df3 = df.copy(deep=True)
df3.loc[[(dti[0], "a")], "c2"] = 1.0
tm.assert_frame_equal(df3, expected)
def test_multiindex_with_datatime_level_preserves_freq(self):
# https://github.com/pandas-dev/pandas/issues/35563
idx = Index(range(2), name="A")
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
mi = MultiIndex.from_product([idx, dti])
df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi)
result = df.loc[0].index
tm.assert_index_equal(result, dti)
assert result.freq == dti.freq
def test_multiindex_complex(self):
# GH#42145
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
non_complex_data = [3, 4, 5]
result = DataFrame(
{
"x": complex_data,
"y": non_complex_data,
"z": non_complex_data,
}
)
result.set_index(["x", "y"], inplace=True)
expected = DataFrame(
{"z": non_complex_data},
index=MultiIndex.from_arrays(
[complex_data, non_complex_data],
names=("x", "y"),
),
)
tm.assert_frame_equal(result, expected)
def test_rename_multiindex_with_duplicates(self):
# GH 38015
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
df = DataFrame(index=mi)
df = df.rename(index={"A": "Apple"}, level=0)
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
expected = DataFrame(index=mi2)
tm.assert_frame_equal(df, expected)
def test_series_align_multiindex_with_nan_overlap_only(self):
# GH 38439
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
ser1 = Series([1, 2], index=mi1)
ser2 = Series([1, 2], index=mi2)
result1, result2 = ser1.align(ser2)
mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
expected1 = Series([1.0, np.nan, 2.0], index=mi)
expected2 = Series([np.nan, 2.0, 1.0], index=mi)
tm.assert_series_equal(result1, expected1)
tm.assert_series_equal(result2, expected2)
def test_series_align_multiindex_with_nan(self):
# GH 38439
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
ser1 = Series([1, 2], index=mi1)
ser2 = Series([1, 2], index=mi2)
result1, result2 = ser1.align(ser2)
mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
expected1 = Series([1, 2], index=mi)
expected2 = Series([2, 1], index=mi)
tm.assert_series_equal(result1, expected1)
tm.assert_series_equal(result2, expected2)
def test_nunique_smoke(self):
# GH 34019
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
assert n == 1
def test_multiindex_repeated_keys(self):
# GH19414
tm.assert_series_equal(
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
["a", "a", "b", "b"]
],
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
)
def test_multiindex_with_na_missing_key(self):
# GH46173
df = DataFrame.from_dict(
{
("foo",): [1, 2, 3],
("bar",): [5, 6, 7],
(None,): [8, 9, 0],
}
)
with pytest.raises(KeyError, match="missing_key"):
df[[("missing_key",)]]
def test_multiindex_dtype_preservation(self):
# GH51261
columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"])
df = DataFrame(["value"], columns=columns).astype("category")
df_no_multiindex = df["A"]
assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype)
# geopandas 1763 analogue
df = DataFrame(
[[1, 0], [0, 1]],
columns=[
["foo", "foo"],
["location", "location"],
["x", "y"],
],
).assign(bools=Series([True, False], dtype="boolean"))
assert isinstance(df["bools"].dtype, BooleanDtype)
def test_multiindex_from_tuples_with_nan(self):
# GH#23578
result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")])
expected = MultiIndex.from_tuples(
[("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")]
)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,269 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
DatetimeIndex,
MultiIndex,
date_range,
)
import pandas._testing as tm
class TestMultiIndexPartial:
def test_getitem_partial_int(self):
# GH 12416
# with single item
l1 = [10, 20]
l2 = ["a", "b"]
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
expected = DataFrame(index=range(2), columns=l2)
result = df[20]
tm.assert_frame_equal(result, expected)
# with list
expected = DataFrame(
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
)
result = df[[20]]
tm.assert_frame_equal(result, expected)
# missing item:
with pytest.raises(KeyError, match="1"):
df[1]
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
df[[1]]
def test_series_slice_partial(self):
pass
def test_xs_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
ymd = multiindex_year_month_day_dataframe_random_data
result = frame.xs("foo")
result2 = frame.loc["foo"]
expected = frame.T["foo"].T
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)
result = ymd.xs((2000, 4))
expected = ymd.loc[2000, 4]
tm.assert_frame_equal(result, expected)
# ex from #1796
index = MultiIndex(
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
codes=[
[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[0, 1, 0, 1, 0, 1, 0, 1],
],
)
df = DataFrame(
np.random.default_rng(2).standard_normal((8, 4)),
index=index,
columns=list("abcd"),
)
result = df.xs(("foo", "one"))
expected = df.loc["foo", "one"]
tm.assert_frame_equal(result, expected)
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
ymd = ymd.T
result = ymd[2000, 2]
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
expected.columns = expected.columns.droplevel(0).droplevel(0)
tm.assert_frame_equal(result, expected)
def test_fancy_slice_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
result = frame.loc["bar":"baz"]
expected = frame[3:7]
tm.assert_frame_equal(result, expected)
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[(2000, 2):(2000, 4)]
lev = ymd.index.codes[1]
expected = ymd[(lev >= 1) & (lev <= 3)]
tm.assert_frame_equal(result, expected)
def test_getitem_partial_column_select(self):
idx = MultiIndex(
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
)
df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
result = df.loc[("a", "y"), :]
expected = df.loc[("a", "y")]
tm.assert_frame_equal(result, expected)
result = df.loc[("a", "y"), [1, 0]]
expected = df.loc[("a", "y")][[1, 0]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
df.loc[("a", "foo"), :]
# TODO(ArrayManager) rewrite test to not use .values
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
@td.skip_array_manager_invalid_test
def test_partial_set(
self,
multiindex_year_month_day_dataframe_random_data,
using_copy_on_write,
warn_copy_on_write,
):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd.copy()
exp = ymd.copy()
df.loc[2000, 4] = 0
exp.iloc[65:85] = 0
tm.assert_frame_equal(df, exp)
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)
df.loc[2000] = 5
exp.iloc[:100] = 5
tm.assert_frame_equal(df, exp)
# this works...for now
with tm.raises_chained_assignment_error():
df["A"].iloc[14] = 5
if using_copy_on_write:
assert df["A"].iloc[14] == exp["A"].iloc[14]
else:
assert df["A"].iloc[14] == 5
@pytest.mark.parametrize("dtype", [int, float])
def test_getitem_intkey_leading_level(
self, multiindex_year_month_day_dataframe_random_data, dtype
):
# GH#33355 dont fall-back to positional when leading level is int
ymd = multiindex_year_month_day_dataframe_random_data
levels = ymd.index.levels
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
ser = ymd["A"]
mi = ser.index
assert isinstance(mi, MultiIndex)
if dtype is int:
assert mi.levels[0].dtype == np.dtype(int)
else:
assert mi.levels[0].dtype == np.float64
assert 14 not in mi.levels[0]
assert not mi.levels[0]._should_fallback_to_positional
assert not mi._should_fallback_to_positional
with pytest.raises(KeyError, match="14"):
ser[14]
# ---------------------------------------------------------------------
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
expected = frame.copy()
result = frame.copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame.copy()
result = frame.copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"indexer, exp_idx, exp_values",
[
(
slice("2019-2", None),
DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
[2, 3],
),
(
slice(None, "2019-2"),
date_range("2019", periods=2, freq="MS"),
[0, 1, 2, 3],
),
],
)
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
# GH: 25165
date_idx = date_range("2019", periods=2, freq="MS")
df = DataFrame(
list(range(4)),
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
)
expected = DataFrame(
exp_values,
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
)
result = df[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc(axis=0)[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc[indexer, :]
tm.assert_frame_equal(result, expected)
df2 = df.swaplevel(0, 1).sort_index()
expected = expected.swaplevel(0, 1).sort_index()
result = df2.loc[:, indexer, :]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_partial_both_axis():
# gh-12660
iterables = [["a", "b"], [2, 1]]
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
)
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
result = df.loc["a", "b"]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,589 @@
import numpy as np
import pytest
from pandas.errors import SettingWithCopyError
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
Series,
date_range,
isna,
notna,
)
import pandas._testing as tm
def assert_equal(a, b):
assert a == b
class TestMultiIndexSetItem:
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
target.loc[indexers] = value
result = target.loc[indexers]
if expected is None:
expected = value
compare_fn(result, expected)
def test_setitem_multiindex(self):
# GH#7190
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
index = MultiIndex.from_product(
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
)
t, n = 0, 2
df = DataFrame(
np.nan,
columns=cols,
index=index,
)
self.check(target=df, indexers=((t, n), "X"), value=0)
df = DataFrame(-999, columns=cols, index=index)
self.check(target=df, indexers=((t, n), "X"), value=1)
df = DataFrame(columns=cols, index=index)
self.check(target=df, indexers=((t, n), "X"), value=2)
# gh-7218: assigning with 0-dim arrays
df = DataFrame(-999, columns=cols, index=index)
self.check(
target=df,
indexers=((t, n), "X"),
value=np.array(3),
expected=3,
)
def test_setitem_multiindex2(self):
# GH#5206
df = DataFrame(
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
)
df["F"] = 99
row_selection = df["A"] % 2 == 0
col_selection = ["B", "C"]
df.loc[row_selection, col_selection] = df["F"]
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
self.check(
target=df,
indexers=(row_selection, col_selection),
value=df["F"],
compare_fn=tm.assert_frame_equal,
expected=output,
)
def test_setitem_multiindex3(self):
# GH#11372
idx = MultiIndex.from_product(
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
)
cols = MultiIndex.from_product(
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
)
df = DataFrame(
np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
)
subidx = MultiIndex.from_arrays(
[["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
)
subcols = MultiIndex.from_arrays(
[["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
)
vals = DataFrame(
np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
)
self.check(
target=df,
indexers=(subidx, subcols),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# set all columns
vals = DataFrame(
np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
)
self.check(
target=df,
indexers=(subidx, slice(None, None, None)),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# identity
copy = df.copy()
self.check(
target=df,
indexers=(df.index, df.columns),
value=df,
compare_fn=tm.assert_frame_equal,
expected=copy,
)
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
@td.skip_array_manager_not_yet_implemented
def test_multiindex_setitem(self):
# GH 3738
# setting with a multi-index right hand side
arrays = [
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
np.array(["one", "two", "one", "one", "two", "one"]),
np.arange(0, 6, 1),
]
df_orig = DataFrame(
np.random.default_rng(2).standard_normal((6, 3)),
index=arrays,
columns=["A", "B", "C"],
).sort_index()
expected = df_orig.loc[["bar"]] * 2
df = df_orig.copy()
df.loc[["bar"]] *= 2
tm.assert_frame_equal(df.loc[["bar"]], expected)
# raise because these have differing levels
msg = "cannot align on a multi-index with out specifying the join levels"
with pytest.raises(TypeError, match=msg):
df.loc["bar"] *= 2
def test_multiindex_setitem2(self):
# from SO
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
df_orig = DataFrame.from_dict(
{
"price": {
("DE", "Coal", "Stock"): 2,
("DE", "Gas", "Stock"): 4,
("DE", "Elec", "Demand"): 1,
("FR", "Gas", "Stock"): 5,
("FR", "Solar", "SupIm"): 0,
("FR", "Wind", "SupIm"): 0,
}
}
)
df_orig.index = MultiIndex.from_tuples(
df_orig.index, names=["Sit", "Com", "Type"]
)
expected = df_orig.copy()
expected.iloc[[0, 1, 3]] *= 2
idx = pd.IndexSlice
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], :] *= 2
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], "price"] *= 2
tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
# GH3777 part 2
# mixed dtype
df = DataFrame(
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
)
df["d"] = np.nan
arr = np.array([0.0, 1.0])
df.loc[4, "d"] = arr
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
def test_multiindex_assignment_single_dtype(
self, using_copy_on_write, warn_copy_on_write
):
# GH3777 part 2b
# single dtype
arr = np.array([0.0, 1.0])
df = DataFrame(
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
dtype=np.int64,
)
view = df["c"].iloc[:2].values
# arr can be losslessly cast to int, so this setitem is inplace
# INFO(CoW-warn) this does not warn because we directly took .values
# above, so no reference to a pandas object is alive for `view`
df.loc[4, "c"] = arr
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
result = df.loc[4, "c"]
tm.assert_series_equal(result, exp)
# extra check for inplace-ness
if not using_copy_on_write:
tm.assert_numpy_array_equal(view, exp.values)
# arr + 0.5 cannot be cast losslessly to int, so we upcast
with tm.assert_produces_warning(
FutureWarning, match="item of incompatible dtype"
):
df.loc[4, "c"] = arr + 0.5
result = df.loc[4, "c"]
exp = exp + 0.5
tm.assert_series_equal(result, exp)
# scalar ok
with tm.assert_cow_warning(warn_copy_on_write):
df.loc[4, "c"] = 10
exp = Series(10, index=[8, 10], name="c", dtype="float64")
tm.assert_series_equal(df.loc[4, "c"], exp)
# invalid assignments
msg = "Must have equal len keys and value when setting with an iterable"
with pytest.raises(ValueError, match=msg):
df.loc[4, "c"] = [0, 1, 2, 3]
with pytest.raises(ValueError, match=msg):
df.loc[4, "c"] = [0]
# But with a length-1 listlike column indexer this behaves like
# `df.loc[4, "c"] = 0
with tm.assert_cow_warning(warn_copy_on_write):
df.loc[4, ["c"]] = [0]
assert (df.loc[4, "c"] == 0).all()
def test_groupby_example(self):
# groupby example
NUM_ROWS = 100
NUM_COLS = 10
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(
np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
dtype=np.int64,
columns=col_names,
)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df["new_col"] = np.nan
# we are actually operating on a copy here
# but in this case, that's ok
for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
df.loc[name, "new_col"] = new_vals
def test_series_setitem(
self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
with tm.assert_cow_warning(warn_copy_on_write):
s[2000, 3] = np.nan
assert isna(s.values[42:65]).all()
assert notna(s.values[:42]).all()
assert notna(s.values[65:]).all()
with tm.assert_cow_warning(warn_copy_on_write):
s[2000, 3, 10] = np.nan
assert isna(s.iloc[49])
with pytest.raises(KeyError, match="49"):
# GH#33355 dont fall-back to positional when leading level is int
s[49]
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()
values = df.values.copy()
result = df[df > 0]
expected = df.where(df > 0)
tm.assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)
with pytest.raises(TypeError, match="boolean values only"):
df[df * 0] = 2
def test_frame_getitem_setitem_multislice(self):
levels = [["t1", "t2"], ["a", "b", "c"]]
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
result = df.loc[:, "value"]
tm.assert_series_equal(df["value"], result)
result = df.loc[df.index[1:3], "value"]
tm.assert_series_equal(df["value"][1:3], result)
result = df.loc[:, :]
tm.assert_frame_equal(df, result)
result = df
df.loc[:, "value"] = 10
result["value"] = 10
tm.assert_frame_equal(df, result)
df.loc[:, :] = 10
tm.assert_frame_equal(df, result)
def test_frame_setitem_multi_column(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
)
cp = df.copy()
cp["a"] = cp["b"]
tm.assert_frame_equal(cp["a"], cp["b"])
# set with ndarray
cp = df.copy()
cp["a"] = cp["b"].values
tm.assert_frame_equal(cp["a"], cp["b"])
def test_frame_setitem_multi_column2(self):
# ---------------------------------------
# GH#1803
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
df = DataFrame(index=[1, 3, 5], columns=columns)
# Works, but adds a column instead of updating the two existing ones
df["A"] = 0.0 # Doesn't work
assert (df["A"].values == 0).all()
# it broadcasts
df["B", "1"] = [1, 2, 3]
df["A"] = df["B", "1"]
sliced_a1 = df["A", "1"]
sliced_a2 = df["A", "2"]
sliced_b1 = df["B", "1"]
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
assert sliced_a1.name == ("A", "1")
assert sliced_a2.name == ("A", "2")
assert sliced_b1.name == ("B", "1")
def test_loc_getitem_tuple_plus_columns(
self, multiindex_year_month_day_dataframe_random_data
):
# GH #1013
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd[:5]
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
index = MultiIndex(
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
)
obj = DataFrame(
np.random.default_rng(2).standard_normal((len(index), 4)),
index=index,
columns=["a", "b", "c", "d"],
)
obj = tm.get_obj(obj, frame_or_series)
res = obj.loc[1:2]
exp = obj.reindex(obj.index[2:])
tm.assert_equal(res, exp)
obj.loc[1:2] = 7
assert (obj.loc[1:2] == 7).values.all()
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
dft = frame.T
s = dft["foo", "two"]
dft["foo", "two"] = s > s.median()
tm.assert_series_equal(dft["foo", "two"], s > s.median())
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
reindexed = dft.reindex(columns=[("foo", "two")])
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
def test_set_column_scalar_with_loc(
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]
frame.loc[subset] = 99
assert (frame.loc[subset].values == 99).all()
frame_original = frame.copy()
col = frame["B"]
with tm.assert_cow_warning(warn_copy_on_write):
col[subset] = 97
if using_copy_on_write:
# chained setitem doesn't work with CoW
tm.assert_frame_equal(frame, frame_original)
else:
assert (frame.loc[subset, "B"] == 97).all()
def test_nonunique_assignment_1750(self):
df = DataFrame(
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
)
df = df.set_index(["A", "B"])
mi = MultiIndex.from_tuples([(1, 1)])
df.loc[mi, "C"] = "_"
assert (df.xs((1, 1))["C"] == "_").all()
def test_astype_assignment_with_dups(self):
# GH 4686
# assignment with dups that has a dtype change
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
index = df.index.copy()
df["A"] = df["A"].astype(np.float64)
tm.assert_index_equal(df.index, index)
def test_setitem_nonmonotonic(self):
# https://github.com/pandas-dev/pandas/issues/31449
index = MultiIndex.from_tuples(
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
)
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
expected = DataFrame({"e": [99, 1, 100]}, index=index)
tm.assert_frame_equal(df, expected)
class TestSetitemWithExpansionMultiIndex:
def test_setitem_new_column_mixed_depth(self):
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
result = df.copy()
expected = df.copy()
result["b"] = [1, 2, 3, 4]
expected["b", "", ""] = [1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
def test_setitem_new_column_all_na(self):
# GH#1534
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
s = Series({(1, 1): 1, (1, 2): 2})
df["new"] = s
assert df["new"].isna().all()
def test_setitem_enlargement_keep_index_names(self):
# GH#53053
mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
mi_expected = MultiIndex.from_tuples(
[(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
)
expected = DataFrame(
data=[[10, 20, 30], [10, 20, 30]],
index=mi_expected,
columns=["A", "B", "C"],
)
tm.assert_frame_equal(df, expected)
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
# is not a view
def test_frame_setitem_view_direct(
multiindex_dataframe_random_data, using_copy_on_write
):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
if using_copy_on_write:
with pytest.raises(ValueError, match="read-only"):
df["foo"].values[:] = 0
assert (df["foo"].values != 0).all()
else:
df["foo"].values[:] = 0
assert (df["foo"].values == 0).all()
def test_frame_setitem_copy_raises(
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
def test_frame_setitem_copy_no_write(
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
result = df
tm.assert_frame_equal(result, expected)
def test_frame_setitem_partial_multiindex():
# GH 54875
df = DataFrame(
{
"a": [1, 2, 3],
"b": [3, 4, 5],
"c": 6,
"d": 7,
}
).set_index(["a", "b", "c"])
ser = Series(8, index=df.index.droplevel("c"))
result = df.copy()
result["d"] = ser
expected = df.copy()
expected["d"] = 8
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,796 @@
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
from pandas.errors import UnsortedIndexError
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.tests.indexing.common import _mklbl
class TestMultiIndexSlicers:
def test_per_axis_per_level_getitem(self):
# GH6134
# example test case
ix = MultiIndex.from_product(
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
)
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
]
]
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
tm.assert_frame_equal(result, expected)
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df = df.sort_index(axis=0).sort_index(axis=1)
# identity
result = df.loc[(slice(None), slice(None)), :]
tm.assert_frame_equal(result, df)
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
result = df.loc[:, (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
# index
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), 1), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# columns
result = df.loc[:, (slice(None), ["foo"])]
expected = df.iloc[:, [1, 3]]
tm.assert_frame_equal(result, expected)
# both
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
expected = df.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc["A", "a"]
expected = DataFrame(
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
index=Index([1, 2, 3], name="two"),
columns=Index(["bar", "foo"], name="lvl1"),
)
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), [1, 2]), :]
expected = df.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# multi-level series
s = Series(np.arange(len(ix.to_numpy())), index=ix)
result = s.loc["A1":"A3", :, ["C1", "C3"]]
expected = s.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in s.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_series_equal(result, expected)
# boolean indexers
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
expected = df.iloc[[2, 3]]
tm.assert_frame_equal(result, expected)
msg = (
"cannot index with a boolean indexer "
"that is not the same length as the index"
)
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), np.array([True, False])), :]
with pytest.raises(KeyError, match=r"\[1\] not in index"):
# slice(None) is on the index, [1] is on the columns, but 1 is
# not in the columns, so we raise
# This used to treat [1] as positional GH#16396
df.loc[slice(None), [1]]
# not lexsorted
assert df.index._lexsort_depth == 2
df = df.sort_index(level=1, axis=0)
assert df.index._lexsort_depth == 0
msg = (
"MultiIndex slicing requires the index to be "
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc[(slice(None), slice("bar")), :]
# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_multiindex_slicers_non_unique(self):
# GH 7106
# non-unique mi index support
df = (
DataFrame(
{
"A": ["foo", "foo", "foo", "foo"],
"B": ["a", "a", "a", "a"],
"C": [1, 2, 1, 3],
"D": [1, 2, 3, 4],
}
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
tm.assert_frame_equal(result, expected)
# this is equivalent of an xs expression
result = df.xs(1, level=2, drop_level=False)
tm.assert_frame_equal(result, expected)
df = (
DataFrame(
{
"A": ["foo", "foo", "foo", "foo"],
"B": ["a", "a", "a", "a"],
"C": [1, 2, 1, 2],
"D": [1, 2, 3, 4],
}
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
assert not result.index.is_unique
tm.assert_frame_equal(result, expected)
# GH12896
# numpy-implementation dependent bug
ints = [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
12,
13,
14,
14,
16,
17,
18,
19,
200000,
200000,
]
n = len(ints)
idx = MultiIndex.from_arrays([["a"] * n, ints])
result = Series([1] * n, index=idx)
result = result.sort_index()
result = result.loc[(slice(None), slice(100000))]
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
tm.assert_series_equal(result, expected)
def test_multiindex_slicers_datetimelike(self):
# GH 7429
# buggy/inconsistent behavior when slicing with datetime-like
dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
freq = [1, 2]
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
df = DataFrame(
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
index=index,
columns=list("ABCD"),
)
# multi-axis slicing
idx = pd.IndexSlice
expected = df.iloc[[0, 2, 4], [0, 1]]
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
slice(1, 1),
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
idx[
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
],
idx[1:1],
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
1,
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
# with strings
result = df.loc[
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
]
tm.assert_frame_equal(result, expected)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
df = DataFrame(
{
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
"DATE": [
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-09-03",
"2013-10-01",
"2013-07-09",
"2013-08-06",
"2013-09-03",
],
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
}
)
df["DATE"] = pd.to_datetime(df["DATE"])
df1 = df.set_index(["A", "B", "DATE"])
df1 = df1.sort_index()
# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice("A1")), :]
expected = df1.iloc[0:10]
tm.assert_frame_equal(result, expected)
# A2 - Get all values from the start to "A2"
result = df1.loc[(slice("A2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None), slice("B1", "B2")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
tm.assert_frame_equal(result, expected)
# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice("A2"), slice("B0")), :]
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
# the As)
result = df1.loc[(slice(None), slice("B2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
tm.assert_frame_equal(result, expected)
# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
def test_per_axis_per_level_doc_examples(self):
# test index maker
idx = pd.IndexSlice
# from indexing.rst / advanced
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx[:, :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
# not sorted
msg = (
"MultiIndex slicing requires the index to be lexsorted: "
r"slicing on levels \[1\], lexsort depth 1"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc["A1", ("a", slice("foo"))]
# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
)
df = df.sort_index(axis=1)
# slicing
df.loc["A1", (slice(None), "foo")]
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
# setitem
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
def test_loc_axis_arguments(self):
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = (
DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
.sort_index()
.sort_index(axis=1)
)
# axis 0
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
# axis 1
result = df.loc(axis=1)[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="columns")[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
# invalid axis
for i in [-1, 2, "foo"]:
msg = f"No axis named {i} for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.loc(axis=i)[:, :, ["C1", "C3"]]
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
# GH29519
df = DataFrame(
np.arange(27).reshape(3, 9),
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
)
result = df.loc(axis=1)["a1":"a2"]
expected = df.iloc[:, :-3]
tm.assert_frame_equal(result, expected)
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
# GH29519
df = DataFrame(
np.arange(27).reshape(3, 9),
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
)
result = df.loc(axis=1)["a1"]
expected = df.iloc[:, :3]
expected.columns = ["b1", "b2", "b3"]
tm.assert_frame_equal(result, expected)
def test_loc_ax_single_level_indexer_simple_df(self):
# GH29519
# test single level indexing on single index column data frame
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
result = df.loc(axis=1)["a"]
expected = Series(np.array([0, 3, 6]), name="a")
tm.assert_series_equal(result, expected)
def test_per_axis_per_level_setitem(self):
# test index maker
idx = pd.IndexSlice
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df_orig = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
# identity
df = df_orig.copy()
df.loc[(slice(None), slice(None)), :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[:, (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
# index
df = df_orig.copy()
df.loc[(slice(None), [1]), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, 1] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
# columns
df = df_orig.copy()
df.loc[:, (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[:, [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# both
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc["A", "a"] = 100
expected = df_orig.copy()
expected.iloc[0:3, 0:2] = 100
tm.assert_frame_equal(df, expected)
# setting with a list-like
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100, 100], [100, 100]], dtype="int64"
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# not enough values
df = df_orig.copy()
msg = "setting an array element with a sequence."
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100], [100, 100]], dtype="int64"
)
msg = "Must have equal len keys and value when setting with an iterable"
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[100, 100, 100, 100], dtype="int64"
)
# with an alignable rhs
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
(slice(None), 1), (slice(None), ["foo"])
]
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
rhs.loc[:, ("c", "bah")] = 10
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
def test_multiindex_label_slicing_with_negative_step(self):
ser = Series(
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
)
SLC = pd.IndexSlice
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
tm.assert_indexing_slices_equivalent(
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
)
def test_multiindex_slice_first_level(self):
# GH 12697
freq = ["a", "b", "c", "d"]
idx = MultiIndex.from_product([freq, range(500)])
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
result = df_slice.loc["a"]
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
tm.assert_frame_equal(result, expected)
result = df_slice.loc["d"]
expected = DataFrame(
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
)
tm.assert_frame_equal(result, expected)
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
result = s[5:]
expected = s.reindex(s.index[5:])
tm.assert_series_equal(result, expected)
s = ymd["A"].copy()
exp = ymd["A"].copy()
s[5:] = 0
exp.iloc[5:] = 0
tm.assert_numpy_array_equal(s.values, exp.values)
result = ymd[5:]
expected = ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"dtype, loc, iloc",
[
# dtype = int, step = -1
("int", slice(None, None, -1), slice(None, None, -1)),
("int", slice(3, None, -1), slice(3, None, -1)),
("int", slice(None, 1, -1), slice(None, 0, -1)),
("int", slice(3, 1, -1), slice(3, 0, -1)),
# dtype = int, step = -2
("int", slice(None, None, -2), slice(None, None, -2)),
("int", slice(3, None, -2), slice(3, None, -2)),
("int", slice(None, 1, -2), slice(None, 0, -2)),
("int", slice(3, 1, -2), slice(3, 0, -2)),
# dtype = str, step = -1
("str", slice(None, None, -1), slice(None, None, -1)),
("str", slice("d", None, -1), slice(3, None, -1)),
("str", slice(None, "b", -1), slice(None, 0, -1)),
("str", slice("d", "b", -1), slice(3, 0, -1)),
# dtype = str, step = -2
("str", slice(None, None, -2), slice(None, None, -2)),
("str", slice("d", None, -2), slice(3, None, -2)),
("str", slice(None, "b", -2), slice(None, 0, -2)),
("str", slice("d", "b", -2), slice(3, 0, -2)),
],
)
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
# GH#38071
labels = {
"str": list("abcde"),
"int": range(5),
}[dtype]
mi = MultiIndex.from_arrays([labels] * 2)
df = DataFrame(1.0, index=mi, columns=["A"])
SLC = pd.IndexSlice
expected = df.iloc[iloc, :]
result_get_loc = df.loc[SLC[loc], :]
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
result_get_locs_level_1 = df.loc[SLC[:, loc], :]
tm.assert_frame_equal(result_get_loc, expected)
tm.assert_frame_equal(result_get_locs_level_0, expected)
tm.assert_frame_equal(result_get_locs_level_1, expected)

View File

@ -0,0 +1,153 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
MultiIndex,
Series,
array,
)
import pandas._testing as tm
class TestMultiIndexSorted:
def test_getitem_multilevel_index_tuple_not_sorted(self):
index_columns = list("abc")
df = DataFrame(
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
)
df = df.set_index(index_columns)
query_index = df.index[:1]
rs = df.loc[query_index, "data"]
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
xp = Series(["x"], index=xp_idx, name="data")
tm.assert_series_equal(rs, xp)
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.sort_index(level=1).T
# buglet with int typechecking
result = df.iloc[:, : np.int32(3)]
expected = df.reindex(columns=df.columns[:3])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("key", [None, lambda x: x])
def test_frame_getitem_not_sorted2(self, key):
# 13431
df = DataFrame(
{
"col1": ["b", "d", "b", "a"],
"col2": [3, 1, 1, 2],
"data": ["one", "two", "three", "four"],
}
)
df2 = df.set_index(["col1", "col2"])
df2_original = df2.copy()
df2.index = df2.index.set_levels(["b", "d", "a"], level="col1")
df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1")
assert not df2.index.is_monotonic_increasing
assert df2_original.index.equals(df2.index)
expected = df2.sort_index(key=key)
assert expected.index.is_monotonic_increasing
result = df2.sort_index(level=0, key=key)
assert result.index.is_monotonic_increasing
tm.assert_frame_equal(result, expected)
def test_sort_values_key(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
index = index.sort_values( # sort by third letter
key=lambda x: x.map(lambda entry: entry[2])
)
result = DataFrame(range(8), index=index)
arrays = [
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
expected = DataFrame(range(8), index=index)
tm.assert_frame_equal(result, expected)
def test_argsort_with_na(self):
# GH48495
arrays = [
array([2, NA, 1], dtype="Int64"),
array([1, 2, 3], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
result = index.argsort()
expected = np.array([2, 0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_sort_values_with_na(self):
# GH48495
arrays = [
array([2, NA, 1], dtype="Int64"),
array([1, 2, 3], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
index = index.sort_values()
result = DataFrame(range(3), index=index)
arrays = [
array([1, 2, NA], dtype="Int64"),
array([3, 1, 2], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
expected = DataFrame(range(3), index=index)
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T
df["foo", "four"] = "foo"
arrays = [np.array(x) for x in zip(*df.columns.values)]
result = df["foo"]
result2 = df.loc[:, "foo"]
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
df = df.T
result = df.xs("foo")
result2 = df.loc["foo"]
expected = df.reindex(df.index[arrays[0] == "foo"])
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
def test_series_getitem_not_sorted(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
arrays = [np.array(x) for x in zip(*index.values)]
result = s["qux"]
result2 = s.loc["qux"]
expected = s[arrays[0] == "qux"]
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)

View File

@ -0,0 +1,257 @@
from datetime import (
datetime,
timezone,
)
import numpy as np
import pytest
from pandas.errors import InvalidIndexError
from pandas import (
CategoricalDtype,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timestamp,
)
import pandas._testing as tm
def test_at_timezone():
# https://github.com/pandas-dev/pandas/issues/33544
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
expected = DataFrame(
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
)
tm.assert_frame_equal(result, expected)
def test_selection_methods_of_assigned_col():
# GH 29282
df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0])
df["c"] = df2["c"]
df.at[1, "c"] = 11
result = df
expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]})
tm.assert_frame_equal(result, expected)
result = df.at[1, "c"]
assert result == 11
result = df["c"]
expected = Series([9, 11, 7], name="c")
tm.assert_series_equal(result, expected)
result = df[["c"]]
expected = DataFrame({"c": [9, 11, 7]})
tm.assert_frame_equal(result, expected)
class TestAtSetItem:
def test_at_setitem_item_cache_cleared(self):
# GH#22372 Note the multi-step construction is necessary to trigger
# the original bug. pandas/issues/22372#issuecomment-413345309
df = DataFrame(index=[0])
df["x"] = 1
df["cost"] = 2
# accessing df["cost"] adds "cost" to the _item_cache
df["cost"]
# This loc[[0]] lookup used to call _consolidate_inplace at the
# BlockManager level, which failed to clear the _item_cache
df.loc[[0]]
df.at[0, "x"] = 4
df.at[0, "cost"] = 789
expected = DataFrame(
{"x": [4], "cost": 789},
index=[0],
columns=Index(["x", "cost"], dtype=object),
)
tm.assert_frame_equal(df, expected)
# And in particular, check that the _item_cache has updated correctly.
tm.assert_series_equal(df["cost"], expected["cost"])
def test_at_setitem_mixed_index_assignment(self):
# GH#19860
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
ser.at["a"] = 11
assert ser.iat[0] == 11
ser.at[1] = 22
assert ser.iat[3] == 22
def test_at_setitem_categorical_missing(self):
df = DataFrame(
index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"])
)
df.at[1, 1] = "foo"
expected = DataFrame(
[
[np.nan, np.nan, np.nan],
[np.nan, "foo", np.nan],
[np.nan, np.nan, np.nan],
],
dtype=CategoricalDtype(["foo", "bar"]),
)
tm.assert_frame_equal(df, expected)
def test_at_setitem_multiindex(self):
df = DataFrame(
np.zeros((3, 2), dtype="int64"),
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
)
df.at[0, "a"] = 10
expected = DataFrame(
[[10, 10], [0, 0], [0, 0]],
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize("row", (Timestamp("2019-01-01"), "2019-01-01"))
def test_at_datetime_index(self, row):
# Set float64 dtype to avoid upcast when setting .5
df = DataFrame(
data=[[1] * 2], index=DatetimeIndex(data=["2019-01-01", "2019-01-02"])
).astype({0: "float64"})
expected = DataFrame(
data=[[0.5, 1], [1.0, 1]],
index=DatetimeIndex(data=["2019-01-01", "2019-01-02"]),
)
df.at[row, 0] = 0.5
tm.assert_frame_equal(df, expected)
class TestAtSetItemWithExpansion:
def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture):
# GH#25506
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
result = Series(ts)
result.at[1] = ts
expected = Series([ts, ts])
tm.assert_series_equal(result, expected)
class TestAtWithDuplicates:
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
# GH#33041 check that falling back to loc doesn't allow non-scalar
# args to slip in
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
df = DataFrame(arr, columns=["A", "A"])
msg = "Invalid call for scalar access"
with pytest.raises(ValueError, match=msg):
df.at[[1, 2]]
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]]
with pytest.raises(ValueError, match=msg):
df.at[:, "A"]
with pytest.raises(ValueError, match=msg):
df.at[[1, 2]] = 1
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]] = 1
with pytest.raises(ValueError, match=msg):
df.at[:, "A"] = 1
class TestAtErrors:
# TODO: De-duplicate/parametrize
# test_at_series_raises_key_error2, test_at_frame_raises_key_error2
def test_at_series_raises_key_error(self, indexer_al):
# GH#31724 .at should match .loc
ser = Series([1, 2, 3], index=[3, 2, 1])
result = indexer_al(ser)[1]
assert result == 3
with pytest.raises(KeyError, match="a"):
indexer_al(ser)["a"]
def test_at_frame_raises_key_error(self, indexer_al):
# GH#31724 .at should match .loc
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
result = indexer_al(df)[1, 0]
assert result == 3
with pytest.raises(KeyError, match="a"):
indexer_al(df)["a", 0]
with pytest.raises(KeyError, match="a"):
indexer_al(df)[1, "a"]
def test_at_series_raises_key_error2(self, indexer_al):
# at should not fallback
# GH#7814
# GH#31724 .at should match .loc
ser = Series([1, 2, 3], index=list("abc"))
result = indexer_al(ser)["a"]
assert result == 1
with pytest.raises(KeyError, match="^0$"):
indexer_al(ser)[0]
def test_at_frame_raises_key_error2(self, indexer_al):
# GH#31724 .at should match .loc
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
result = indexer_al(df)["a", "A"]
assert result == 1
with pytest.raises(KeyError, match="^0$"):
indexer_al(df)["a", 0]
def test_at_frame_multiple_columns(self):
# GH#48296 - at shouldn't modify multiple columns
df = DataFrame({"a": [1, 2], "b": [3, 4]})
new_row = [6, 7]
with pytest.raises(
InvalidIndexError,
match=f"You can only assign a scalar value not a \\{type(new_row)}",
):
df.at[5] = new_row
def test_at_getitem_mixed_index_no_fallback(self):
# GH#19860
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
with pytest.raises(KeyError, match="^0$"):
ser.at[0]
with pytest.raises(KeyError, match="^4$"):
ser.at[4]
def test_at_categorical_integers(self):
# CategoricalIndex with integer categories that don't happen to match
# the Categorical's codes
ci = CategoricalIndex([3, 4])
arr = np.arange(4).reshape(2, 2)
frame = DataFrame(arr, index=ci)
for df in [frame, frame.T]:
for key in [0, 1]:
with pytest.raises(KeyError, match=str(key)):
df.at[key, key]
def test_at_applied_for_rows(self):
# GH#48729 .at should raise InvalidIndexError when assigning rows
df = DataFrame(index=["a"], columns=["col1", "col2"])
new_row = [123, 15]
with pytest.raises(
InvalidIndexError,
match=f"You can only assign a scalar value not a \\{type(new_row)}",
):
df.at["a"] = new_row

View File

@ -0,0 +1,573 @@
import re
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
DataFrame,
Index,
Interval,
Series,
Timedelta,
Timestamp,
option_context,
)
import pandas._testing as tm
@pytest.fixture
def df():
return DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
list("aabbca"), dtype=CategoricalDtype(list("cab")), name="B"
),
)
@pytest.fixture
def df2():
return DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
list("aabbca"), dtype=CategoricalDtype(list("cabe")), name="B"
),
)
class TestCategoricalIndex:
def test_loc_scalar(self, df):
dtype = CategoricalDtype(list("cab"))
result = df.loc["a"]
bidx = Series(list("aaa"), name="B").astype(dtype)
assert bidx.dtype == dtype
expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
tm.assert_frame_equal(result, expected)
df = df.copy()
df.loc["a"] = 20
bidx2 = Series(list("aabbca"), name="B").astype(dtype)
assert bidx2.dtype == dtype
expected = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20],
},
index=Index(bidx2),
)
tm.assert_frame_equal(df, expected)
# value not in the categories
with pytest.raises(KeyError, match=r"^'d'$"):
df.loc["d"]
df2 = df.copy()
expected = df2.copy()
expected.index = expected.index.astype(object)
expected.loc["d"] = 10
df2.loc["d"] = 10
tm.assert_frame_equal(df2, expected)
def test_loc_setitem_with_expansion_non_category(self, df):
# Setting-with-expansion with a new key "d" that is not among caegories
df.loc["a"] = 20
# Setting a new row on an existing column
df3 = df.copy()
df3.loc["d", "A"] = 10
bidx3 = Index(list("aabbcad"), name="B")
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, 10.0],
},
index=Index(bidx3),
)
tm.assert_frame_equal(df3, expected3)
# Setting a new row _and_ new column
df4 = df.copy()
df4.loc["d", "C"] = 10
expected3 = DataFrame(
{
"A": [20, 20, 2, 3, 4, 20, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
},
index=Index(bidx3),
)
tm.assert_frame_equal(df4, expected3)
def test_loc_getitem_scalar_non_category(self, df):
with pytest.raises(KeyError, match="^1$"):
df.loc[1]
def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reverse = cat[::-1]
exp = np.array([4, 3, 2, 1], dtype=np.int64)
tm.assert_numpy_array_equal(reverse.__array__(), exp)
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
result = df.iloc[10]
tm.assert_series_equal(result, expected)
expected = DataFrame(
{"value": np.arange(11, 21).astype("int64")},
index=np.arange(10, 20).astype("int64"),
)
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
result = df.iloc[10:20]
tm.assert_frame_equal(result, expected)
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
result = df.loc[8]
tm.assert_series_equal(result, expected)
def test_slicing_and_getting_ops(self):
# systematically test the slicing operations:
# for all slicing ops:
# - returning a dataframe
# - returning a column
# - returning a row
# - returning a single value
cats = Categorical(
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
)
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 2, 3, 4, 5, 6, 7]
df = DataFrame({"cats": cats, "values": values}, index=idx)
# the expected values
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
idx2 = Index(["j", "k"])
values2 = [3, 4]
# 2:4,: | "j":"k",:
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
# :,"cats" | :,0
exp_col = Series(cats, index=idx, name="cats")
# "j",: | 2,:
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
# "j","cats | 2,0
exp_val = "b"
# iloc
# frame
res_df = df.iloc[2:4, :]
tm.assert_frame_equal(res_df, exp_df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
# row
res_row = df.iloc[2, :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
# col
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert isinstance(res_col.dtype, CategoricalDtype)
# single value
res_val = df.iloc[2, 0]
assert res_val == exp_val
# loc
# frame
res_df = df.loc["j":"k", :]
tm.assert_frame_equal(res_df, exp_df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
# row
res_row = df.loc["j", :]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
# col
res_col = df.loc[:, "cats"]
tm.assert_series_equal(res_col, exp_col)
assert isinstance(res_col.dtype, CategoricalDtype)
# single value
res_val = df.loc["j", "cats"]
assert res_val == exp_val
# single value
res_val = df.loc["j", df.columns[0]]
assert res_val == exp_val
# iat
res_val = df.iat[2, 0]
assert res_val == exp_val
# at
res_val = df.at["j", "cats"]
assert res_val == exp_val
# fancy indexing
exp_fancy = df.iloc[[2]]
res_fancy = df[df["cats"] == "b"]
tm.assert_frame_equal(res_fancy, exp_fancy)
res_fancy = df[df["values"] == 3]
tm.assert_frame_equal(res_fancy, exp_fancy)
# get_value
res_val = df.at["j", "cats"]
assert res_val == exp_val
# i : int, slice, or sequence of integers
res_row = df.iloc[2]
tm.assert_series_equal(res_row, exp_row)
assert isinstance(res_row["cats"], str)
res_df = df.iloc[slice(2, 4)]
tm.assert_frame_equal(res_df, exp_df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
res_df = df.iloc[[2, 3]]
tm.assert_frame_equal(res_df, exp_df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
res_col = df.iloc[:, 0]
tm.assert_series_equal(res_col, exp_col)
assert isinstance(res_col.dtype, CategoricalDtype)
res_df = df.iloc[:, slice(0, 2)]
tm.assert_frame_equal(res_df, df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
res_df = df.iloc[:, [0, 1]]
tm.assert_frame_equal(res_df, df)
assert isinstance(res_df["cats"].dtype, CategoricalDtype)
def test_slicing_doc_examples(self):
# GH 7918
cats = Categorical(
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
)
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 2, 2, 2, 3, 4, 5]
df = DataFrame({"cats": cats, "values": values}, index=idx)
result = df.iloc[2:4, :]
expected = DataFrame(
{
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
"values": [2, 2],
},
index=["j", "k"],
)
tm.assert_frame_equal(result, expected)
result = df.iloc[2:4, :].dtypes
expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", "cats"]
expected = Series(
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
index=["h", "i", "j"],
name="cats",
)
tm.assert_series_equal(result, expected)
result = df.loc["h":"j", df.columns[0:1]]
expected = DataFrame(
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
index=["h", "i", "j"],
)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_listlike_labels(self, df):
# list of labels
result = df.loc[["c", "a"]]
expected = df.iloc[[4, 0, 1, 5]]
tm.assert_frame_equal(result, expected, check_index_type=True)
def test_loc_getitem_listlike_unused_category(self, df2):
# GH#37901 a label that is in index.categories but not in index
# listlike containing an element in the categories but not in the values
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
df2.loc[["a", "b", "e"]]
def test_loc_getitem_label_unused_category(self, df2):
# element in the categories but not in the values
with pytest.raises(KeyError, match=r"^'e'$"):
df2.loc["e"]
def test_loc_getitem_non_category(self, df2):
# not all labels in the categories
with pytest.raises(KeyError, match=re.escape("['d'] not in index")):
df2.loc[["a", "d"]]
def test_loc_setitem_expansion_label_unused_category(self, df2):
# assigning with a label that is in the categories but not in the index
df = df2.copy()
df.loc["e"] = 20
result = df.loc[["a", "b", "e"]]
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
tm.assert_frame_equal(result, expected)
def test_loc_listlike_dtypes(self):
# GH 11586
# unique categories and codes
index = CategoricalIndex(["a", "b", "c"])
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
# unique slice
res = df.loc[["a", "b"]]
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
df.loc[["a", "x"]]
def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
# duplicated categories and codes
index = CategoricalIndex(["a", "b", "a"])
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
# unique slice
res = df.loc[["a", "b"]]
exp = DataFrame(
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp = DataFrame(
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
df.loc[["a", "x"]]
def test_loc_listlike_dtypes_unused_category(self):
# contains unused category
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
res = df.loc[["a", "b"]]
exp = DataFrame(
{"A": [1, 3, 2], "B": [5, 7, 6]},
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
# duplicated slice
res = df.loc[["a", "a", "b"]]
exp = DataFrame(
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
)
tm.assert_frame_equal(res, exp, check_index_type=True)
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
df.loc[["a", "x"]]
def test_loc_getitem_listlike_unused_category_raises_keyerror(self):
# key that is an *unused* category raises
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
with pytest.raises(KeyError, match="e"):
# For comparison, check the scalar behavior
df.loc["e"]
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
df.loc[["a", "e"]]
def test_ix_categorical_index(self):
# GH 12531
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
index=list("ABC"),
columns=list("XYZ"),
)
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
tm.assert_series_equal(cdf.loc["A", :], expect)
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
tm.assert_series_equal(cdf.loc[:, "X"], expect)
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
def test_ix_categorical_index_non_unique(self, infer_string):
# non-unique
with option_context("future.infer_string", infer_string):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
index=list("ABA"),
columns=list("XYX"),
)
cdf = df.copy()
cdf.index = CategoricalIndex(df.index)
cdf.columns = CategoricalIndex(df.columns)
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
tm.assert_frame_equal(cdf.loc["A", :], expect)
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
tm.assert_frame_equal(cdf.loc[:, "X"], expect)
expect = DataFrame(
df.loc[["A", "B"], :],
columns=cdf.columns,
index=CategoricalIndex(list("AAB")),
)
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
expect = DataFrame(
df.loc[:, ["X", "Y"]],
index=cdf.index,
columns=CategoricalIndex(list("XXY")),
)
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
def test_loc_slice(self, df):
# GH9748
msg = (
"cannot do slice indexing on CategoricalIndex with these "
r"indexers \[1\] of type int"
)
with pytest.raises(TypeError, match=msg):
df.loc[1:5]
result = df.loc["b":"c"]
expected = df.iloc[[2, 3, 4]]
tm.assert_frame_equal(result, expected)
def test_loc_and_at_with_categorical_index(self):
# GH 20629
df = DataFrame(
[[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"])
)
s = df[0]
assert s.loc["A"] == 1
assert s.at["A"] == 1
assert df.loc["B", 1] == 4
assert df.at["B", 1] == 4
@pytest.mark.parametrize(
"idx_values",
[
# python types
[1, 2, 3],
[-1, -2, -3],
[1.5, 2.5, 3.5],
[-1.5, -2.5, -3.5],
# numpy int/uint
*(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES),
# numpy floats
*(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES),
# numpy object
np.array([1, "b", 3.5], dtype=object),
# pandas scalars
[Interval(1, 4), Interval(4, 6), Interval(6, 9)],
[Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)],
[Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")],
# pandas Integer arrays
*(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES),
# other pandas arrays
pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array,
pd.date_range("2019-01-01", periods=3).array,
pd.timedelta_range(start="1d", periods=3).array,
],
)
def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
# GH-17569
cat_idx = CategoricalIndex(idx_values, ordered=ordered)
df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx)
sl = slice(idx_values[0], idx_values[1])
# scalar selection
result = df.loc[idx_values[0]]
expected = Series(["foo"], index=["A"], name=idx_values[0])
tm.assert_series_equal(result, expected)
# list selection
result = df.loc[idx_values[:2]]
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
tm.assert_frame_equal(result, expected)
# slice selection
result = df.loc[sl]
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
tm.assert_frame_equal(result, expected)
# scalar assignment
result = df.copy()
result.loc[idx_values[0]] = "qux"
expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx)
tm.assert_frame_equal(result, expected)
# list assignment
result = df.copy()
result.loc[idx_values[:2], "A"] = ["qux", "qux2"]
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
tm.assert_frame_equal(result, expected)
# slice assignment
result = df.copy()
result.loc[sl, "A"] = ["qux", "qux2"]
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
tm.assert_frame_equal(result, expected)
def test_getitem_categorical_with_nan(self):
# GH#41933
ci = CategoricalIndex(["A", "B", np.nan])
ser = Series(range(3), index=ci)
assert ser[np.nan] == 2
assert ser.loc[np.nan] == 2
df = DataFrame(ser)
assert df.loc[np.nan, 0] == 2
assert df.loc[np.nan][0] == 2

View File

@ -0,0 +1,647 @@
from string import ascii_letters
import numpy as np
import pytest
from pandas.errors import (
SettingWithCopyError,
SettingWithCopyWarning,
)
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
date_range,
option_context,
)
import pandas._testing as tm
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
def random_text(nobs=100):
# Construct a DataFrame where each row is a random slice from 'letters'
idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2))
idxs.sort(axis=1)
strings = [ascii_letters[x[0] : x[1]] for x in idxs]
return DataFrame(strings, columns=["letters"])
class TestCaching:
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
# this is chained assignment, but will 'work'
with option_context("chained_assignment", None):
# #3970
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
# Creates a second float block
df["cc"] = 0.0
# caches a reference to the 'bb' series
df["bb"]
# Assignment to wrong series
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.17
df._clear_item_cache()
if not using_copy_on_write:
tm.assert_almost_equal(df["bb"][0], 0.17)
else:
# with ArrayManager, parent is not mutated with chained assignment
tm.assert_almost_equal(df["bb"][0], 2.2)
@pytest.mark.parametrize("do_ref", [True, False])
def test_setitem_cache_updating(self, do_ref):
# GH 5424
cont = ["one", "two", "three", "four", "five", "six", "seven"]
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
# ref the cache
if do_ref:
df.loc[0, "c"]
# set it
df.loc[7, "c"] = 1
assert df.loc[0, "c"] == 0.0
assert df.loc[7, "c"] == 1.0
def test_setitem_cache_updating_slices(
self, using_copy_on_write, warn_copy_on_write
):
# GH 7084
# not updating cache on series setting with slices
expected = DataFrame(
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
)
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
# loop through df to update out
six = Timestamp("5/7/2014")
eix = Timestamp("5/9/2014")
for ix, row in df.iterrows():
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
# try via a chain indexing
# this actually works
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
out_original = out.copy()
for ix, row in df.iterrows():
v = out[row["C"]][six:eix] + row["D"]
with tm.raises_chained_assignment_error(
(ix == 0) or warn_copy_on_write or using_copy_on_write
):
out[row["C"]][six:eix] = v
if not using_copy_on_write:
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
else:
tm.assert_frame_equal(out, out_original)
tm.assert_series_equal(out["A"], out_original["A"])
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
for ix, row in df.iterrows():
out.loc[six:eix, row["C"]] += row["D"]
tm.assert_frame_equal(out, expected)
tm.assert_series_equal(out["A"], expected["A"])
def test_altering_series_clears_parent_cache(
self, using_copy_on_write, warn_copy_on_write
):
# GH #33675
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
ser = df["A"]
if using_copy_on_write or warn_copy_on_write:
assert "A" not in df._item_cache
else:
assert "A" in df._item_cache
# Adding a new entry to ser swaps in a new array, so "A" needs to
# be removed from df._item_cache
ser["c"] = 5
assert len(ser) == 3
assert "A" not in df._item_cache
assert df["A"] is not ser
assert len(df["A"]) == 2
class TestChaining:
def test_setitem_chained_setfault(self, using_copy_on_write):
# GH6026
data = ["right", "left", "left", "left", "right", "left", "timeout"]
mdata = ["right", "left", "left", "left", "right", "left", "none"]
df = DataFrame({"response": np.array(data)})
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
recarray = np.rec.fromarrays([data], names=["response"])
df = DataFrame(recarray)
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, DataFrame({"response": data}))
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
df = DataFrame({"response": data, "response1": data})
df_original = df.copy()
mask = df.response == "timeout"
with tm.raises_chained_assignment_error():
df.response[mask] = "none"
if using_copy_on_write:
tm.assert_frame_equal(df, df_original)
else:
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
# GH 6056
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
with tm.raises_chained_assignment_error():
df["A"].iloc[0] = np.nan
if using_copy_on_write:
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
else:
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
result = df.head()
tm.assert_frame_equal(result, expected)
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
with tm.raises_chained_assignment_error():
df.A.iloc[0] = np.nan
result = df.head()
tm.assert_frame_equal(result, expected)
@pytest.mark.arm_slow
def test_detect_chained_assignment(self, using_copy_on_write):
with option_context("chained_assignment", "raise"):
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
df = DataFrame(
np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64"
)
df_original = df.copy()
assert df._is_copy is None
with tm.raises_chained_assignment_error():
df["A"][0] = -5
with tm.raises_chained_assignment_error():
df["A"][1] = -6
if using_copy_on_write:
tm.assert_frame_equal(df, df_original)
else:
tm.assert_frame_equal(df, expected)
@pytest.mark.arm_slow
def test_detect_chained_assignment_raises(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
# test with the chaining
df = DataFrame(
{
"A": Series(range(2), dtype="int64"),
"B": np.array(np.arange(2, 4), dtype=np.float64),
}
)
df_original = df.copy()
assert df._is_copy is None
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"][0] = -5
with tm.raises_chained_assignment_error():
df["A"][1] = -6
tm.assert_frame_equal(df, df_original)
elif warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"][0] = -5
with tm.raises_chained_assignment_error():
df["A"][1] = np.nan
elif not using_array_manager:
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["A"][0] = -5
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["A"][1] = np.nan
assert df["A"]._is_copy is None
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
# a mixed dataframe
df["A"][0] = -5
df["A"][1] = -6
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
expected["B"] = expected["B"].astype("float64")
tm.assert_frame_equal(df, expected)
@pytest.mark.arm_slow
def test_detect_chained_assignment_fails(
self, using_copy_on_write, warn_copy_on_write
):
# Using a copy (the chain), fails
df = DataFrame(
{
"A": Series(range(2), dtype="int64"),
"B": np.array(np.arange(2, 4), dtype=np.float64),
}
)
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df.loc[0]["A"] = -5
else:
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[0]["A"] = -5
@pytest.mark.arm_slow
def test_detect_chained_assignment_doc_example(
self, using_copy_on_write, warn_copy_on_write
):
# Doc example
df = DataFrame(
{
"a": ["one", "one", "two", "three", "two", "one", "six"],
"c": Series(range(7), dtype="int64"),
}
)
assert df._is_copy is None
indexer = df.a.str.startswith("o")
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df[indexer]["c"] = 42
else:
with pytest.raises(SettingWithCopyError, match=msg):
df[indexer]["c"] = 42
@pytest.mark.arm_slow
def test_detect_chained_assignment_object_dtype(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame(
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
)
df_original = df.copy()
if not using_copy_on_write and not warn_copy_on_write:
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[0]["A"] = 111
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"][0] = 111
tm.assert_frame_equal(df, df_original)
elif warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"][0] = 111
tm.assert_frame_equal(df, expected)
elif not using_array_manager:
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["A"][0] = 111
df.loc[0, "A"] = 111
tm.assert_frame_equal(df, expected)
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
# a mixed dataframe
df["A"][0] = 111
tm.assert_frame_equal(df, expected)
@pytest.mark.arm_slow
def test_detect_chained_assignment_is_copy_pickle(self):
# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df._is_copy is None
with tm.ensure_clean("__tmp__pickle") as path:
df.to_pickle(path)
df2 = pd.read_pickle(path)
df2["B"] = df2["A"]
df2["B"] = df2["A"]
@pytest.mark.arm_slow
def test_detect_chained_assignment_setting_entire_column(self):
# gh-5597: a spurious raise as we are setting the entire column here
df = random_text(100000)
# Always a copy
x = df.iloc[[0, 1, 2]]
assert x._is_copy is not None
x = df.iloc[[0, 1, 2, 4]]
assert x._is_copy is not None
# Explicitly copy
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer].copy()
assert df._is_copy is None
df["letters"] = df["letters"].apply(str.lower)
@pytest.mark.arm_slow
def test_detect_chained_assignment_implicit_take(self):
# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df["letters"] = df["letters"].apply(str.lower)
@pytest.mark.arm_slow
def test_detect_chained_assignment_implicit_take2(
self, using_copy_on_write, warn_copy_on_write
):
if using_copy_on_write or warn_copy_on_write:
pytest.skip("_is_copy is not always set for CoW")
# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df._is_copy is not None
df.loc[:, "letters"] = df["letters"].apply(str.lower)
# with the enforcement of #45333 in 2.0, the .loc[:, letters] setting
# is inplace, so df._is_copy remains non-None.
assert df._is_copy is not None
df["letters"] = df["letters"].apply(str.lower)
assert df._is_copy is None
@pytest.mark.arm_slow
def test_detect_chained_assignment_str(self):
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
@pytest.mark.arm_slow
def test_detect_chained_assignment_is_copy(self):
# an identical take, so no copy
df = DataFrame({"a": [1]}).dropna()
assert df._is_copy is None
df["a"] += 1
@pytest.mark.arm_slow
def test_detect_chained_assignment_sorting(self):
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
ser = df.iloc[:, 0].sort_values()
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
tm.assert_series_equal(ser, df[0].sort_values())
@pytest.mark.arm_slow
def test_detect_chained_assignment_false_positives(self):
# see gh-6025: false positives
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
str(df)
df["column1"] = df["column1"] + "b"
str(df)
df = df[df["column2"] != 8]
str(df)
df["column1"] = df["column1"] + "c"
str(df)
@pytest.mark.arm_slow
def test_detect_chained_assignment_undefined_column(
self, using_copy_on_write, warn_copy_on_write
):
# from SO:
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
df = DataFrame(np.arange(0, 9), columns=["count"])
df["group"] = "b"
df_original = df.copy()
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df.iloc[0:5]["group"] = "a"
tm.assert_frame_equal(df, df_original)
elif warn_copy_on_write:
with tm.raises_chained_assignment_error():
df.iloc[0:5]["group"] = "a"
else:
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df.iloc[0:5]["group"] = "a"
@pytest.mark.arm_slow
def test_detect_chained_assignment_changing_dtype(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
# Mixed type setting but same dtype & changing dtype
df = DataFrame(
{
"A": date_range("20130101", periods=5),
"B": np.random.default_rng(2).standard_normal(5),
"C": np.arange(5, dtype="int64"),
"D": ["a", "b", "c", "d", "e"],
}
)
df_original = df.copy()
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df.loc[2]["D"] = "foo"
with tm.raises_chained_assignment_error():
df.loc[2]["C"] = "foo"
tm.assert_frame_equal(df, df_original)
with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)):
df["C"][2] = "foo"
if using_copy_on_write:
tm.assert_frame_equal(df, df_original)
else:
assert df.loc[2, "C"] == "foo"
else:
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[2]["D"] = "foo"
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[2]["C"] = "foo"
if not using_array_manager:
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["C"][2] = "foo"
else:
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
# changing the dtype or not
df["C"][2] = "foo"
assert df.loc[2, "C"] == "foo"
def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write):
# operating on a copy
df = DataFrame(
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
)
df_original = df.copy()
mask = pd.isna(df.c)
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df[["c"]][mask] = df[["b"]][mask]
tm.assert_frame_equal(df, df_original)
elif warn_copy_on_write:
with tm.raises_chained_assignment_error():
df[["c"]][mask] = df[["b"]][mask]
else:
with pytest.raises(SettingWithCopyError, match=msg):
df[["c"]][mask] = df[["b"]][mask]
def test_setting_with_copy_bug_no_warning(self):
# invalid warning as we are returning a new object
# GH 8730
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
df2 = df1[["x"]]
# this should not raise
df2["y"] = ["g", "h", "i"]
def test_detect_chained_assignment_warnings_errors(
self, using_copy_on_write, warn_copy_on_write
):
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df.loc[0]["A"] = 111
return
with option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(SettingWithCopyWarning):
df.loc[0]["A"] = 111
with option_context("chained_assignment", "raise"):
with pytest.raises(SettingWithCopyError, match=msg):
df.loc[0]["A"] = 111
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
def test_detect_chained_assignment_warning_stacklevel(
self, rhs, using_copy_on_write, warn_copy_on_write
):
# GH#42570
df = DataFrame(np.arange(25).reshape(5, 5))
df_original = df.copy()
chained = df.loc[:3]
with option_context("chained_assignment", "warn"):
if not using_copy_on_write and not warn_copy_on_write:
with tm.assert_produces_warning(SettingWithCopyWarning) as t:
chained[2] = rhs
assert t[0].filename == __file__
else:
# INFO(CoW) no warning, and original dataframe not changed
chained[2] = rhs
tm.assert_frame_equal(df, df_original)
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
@td.skip_array_manager_not_yet_implemented
def test_chained_getitem_with_lists(self):
# GH6394
# Regression in chained getitem indexing with embedded list-like from
# 0.12
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
expected = df["A"].iloc[2]
result = df.loc[2, "A"]
tm.assert_numpy_array_equal(result, expected)
result2 = df.iloc[2]["A"]
tm.assert_numpy_array_equal(result2, expected)
result3 = df["A"].loc[2]
tm.assert_numpy_array_equal(result3, expected)
result4 = df["A"].iloc[2]
tm.assert_numpy_array_equal(result4, expected)
def test_cache_updating(self):
# GH 4939, make sure to update the cache on setitem
df = DataFrame(
np.zeros((10, 4)),
columns=Index(list("ABCD"), dtype=object),
)
df["A"] # cache series
df.loc["Hello Friend"] = df.iloc[0]
assert "Hello Friend" in df["A"].index
assert "Hello Friend" in df["B"].index
def test_cache_updating2(self, using_copy_on_write):
# 10264
df = DataFrame(
np.zeros((5, 5), dtype="int64"),
columns=["a", "b", "c", "d", "e"],
index=range(5),
)
df["f"] = 0
df_orig = df.copy()
if using_copy_on_write:
with pytest.raises(ValueError, match="read-only"):
df.f.values[3] = 1
tm.assert_frame_equal(df, df_orig)
return
df.f.values[3] = 1
df.f.values[3] = 2
expected = DataFrame(
np.zeros((5, 6), dtype="int64"),
columns=["a", "b", "c", "d", "e", "f"],
index=range(5),
)
expected.at[3, "f"] = 2
tm.assert_frame_equal(df, expected)
expected = Series([0, 0, 0, 2, 0], name="f")
tm.assert_series_equal(df.f, expected)
def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
# GH#3970
with option_context("chained_assignment", None):
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
df["cc"] = 0.0
ck = [True] * len(df)
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.13
# GH#3970 this lookup used to break the chained setting to 0.15
df.iloc[ck]
with tm.raises_chained_assignment_error():
df["bb"].iloc[0] = 0.15
if not using_copy_on_write:
assert df["bb"].iloc[0] == 0.15
else:
assert df["bb"].iloc[0] == 2.2
def test_getitem_loc_assignment_slice_state(self):
# GH 13569
df = DataFrame({"a": [10, 20, 30]})
with tm.raises_chained_assignment_error():
df["a"].loc[4] = 40
tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]}))
tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a"))

View File

@ -0,0 +1,105 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.api.indexers import check_array_indexer
@pytest.mark.parametrize(
"indexer, expected",
[
# integer
([1, 2], np.array([1, 2], dtype=np.intp)),
(np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)),
(pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)),
(pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)),
# boolean
([True, False, True], np.array([True, False, True], dtype=np.bool_)),
(np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)),
(
pd.array([True, False, True], dtype="boolean"),
np.array([True, False, True], dtype=np.bool_),
),
# other
([], np.array([], dtype=np.intp)),
],
)
def test_valid_input(indexer, expected):
arr = np.array([1, 2, 3])
result = check_array_indexer(arr, indexer)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")]
)
def test_boolean_na_returns_indexer(indexer):
# https://github.com/pandas-dev/pandas/issues/31503
arr = np.array([1, 2, 3])
result = check_array_indexer(arr, indexer)
expected = np.array([True, False, False], dtype=bool)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"indexer",
[
[True, False],
pd.array([True, False], dtype="boolean"),
np.array([True, False], dtype=np.bool_),
],
)
def test_bool_raise_length(indexer):
arr = np.array([1, 2, 3])
msg = "Boolean index has wrong length"
with pytest.raises(IndexError, match=msg):
check_array_indexer(arr, indexer)
@pytest.mark.parametrize(
"indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")]
)
def test_int_raise_missing_values(indexer):
arr = np.array([1, 2, 3])
msg = "Cannot index with an integer indexer containing NA values"
with pytest.raises(ValueError, match=msg):
check_array_indexer(arr, indexer)
@pytest.mark.parametrize(
"indexer",
[
[0.0, 1.0],
np.array([1.0, 2.0], dtype="float64"),
np.array([True, False], dtype=object),
pd.Index([True, False], dtype=object),
],
)
def test_raise_invalid_array_dtypes(indexer):
arr = np.array([1, 2, 3])
msg = "arrays used as indices must be of integer or boolean type"
with pytest.raises(IndexError, match=msg):
check_array_indexer(arr, indexer)
def test_raise_nullable_string_dtype(nullable_string_dtype):
indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
arr = np.array([1, 2, 3])
msg = "arrays used as indices must be of integer or boolean type"
with pytest.raises(IndexError, match=msg):
check_array_indexer(arr, indexer)
@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
def test_pass_through_non_array_likes(indexer):
arr = np.array([1, 2, 3])
result = check_array_indexer(arr, indexer)
assert result == indexer

View File

@ -0,0 +1,940 @@
from __future__ import annotations
from datetime import (
datetime,
timedelta,
)
import itertools
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
from pandas.compat import (
IS64,
is_platform_windows,
)
from pandas.compat.numpy import np_version_gt2
import pandas as pd
import pandas._testing as tm
###############################################################
# Index / Series common tests which may trigger dtype coercions
###############################################################
@pytest.fixture(autouse=True, scope="class")
def check_comprehensiveness(request):
# Iterate over combination of dtype, method and klass
# and ensure that each are contained within a collected test
cls = request.cls
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
def has_test(combo):
klass, dtype, method = combo
cls_funcs = request.node.session.items
return any(
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
)
opts = request.config.option
if opts.lf or opts.keyword:
# If we are running with "last-failed" or -k foo, we expect to only
# run a subset of tests.
yield
else:
for combo in combos:
if not has_test(combo):
raise AssertionError(
f"test method is not defined: {cls.__name__}, {combo}"
)
yield
class CoercionBase:
klasses = ["index", "series"]
dtypes = [
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64",
"datetime64tz",
"timedelta64",
"period",
]
@property
def method(self):
raise NotImplementedError(self)
class TestSetitemCoercion(CoercionBase):
method = "setitem"
# disable comprehensiveness tests, as most of these have been moved to
# tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses.
klasses: list[str] = []
def test_setitem_series_no_coercion_from_values_list(self):
# GH35865 - int casted to str when internally calling np.array(ser.values)
ser = pd.Series(["a", 1])
ser[:] = list(ser.values)
expected = pd.Series(["a", 1])
tm.assert_series_equal(ser, expected)
def _assert_setitem_index_conversion(
self, original_series, loc_key, expected_index, expected_dtype
):
"""test index's coercion triggered by assign key"""
temp = original_series.copy()
# GH#33469 pre-2.0 with int loc_key and temp.index.dtype == np.float64
# `temp[loc_key] = 5` treated loc_key as positional
temp[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
temp = original_series.copy()
temp.loc[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
@pytest.mark.parametrize(
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
)
def test_setitem_index_object(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
assert obj.index.dtype == object
if exp_dtype is IndexError:
temp = obj.copy()
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(exp_dtype, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
temp[5] = 5
else:
exp_index = pd.Index(list("abcd") + [val], dtype=object)
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
)
def test_setitem_index_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.index.dtype == np.int64
exp_index = pd.Index([0, 1, 2, 3, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype", [(5, np.float64), (5.1, np.float64), ("x", object)]
)
def test_setitem_index_float64(self, val, exp_dtype, request):
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
assert obj.index.dtype == np.float64
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_series_period(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_complex128(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_bool(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_datetime64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_datetime64tz(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_timedelta64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_setitem_index_period(self):
raise NotImplementedError
class TestInsertIndexCoercion(CoercionBase):
klasses = ["index"]
method = "insert"
def _assert_insert_conversion(self, original, value, expected, expected_dtype):
"""test coercion triggered by insert"""
target = original.copy()
res = target.insert(1, value)
tm.assert_index_equal(res, expected)
assert res.dtype == expected_dtype
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1, object),
(1.1, 1.1, object),
(False, False, object),
("x", "x", object),
],
)
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
obj = pd.Index(list("abcd"), dtype=object)
assert obj.dtype == object
exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1, None),
(1.1, 1.1, np.float64),
(False, False, object), # GH#36319
("x", "x", object),
],
)
def test_insert_int_index(
self, any_int_numpy_dtype, insert, coerced_val, coerced_dtype
):
dtype = any_int_numpy_dtype
obj = pd.Index([1, 2, 3, 4], dtype=dtype)
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
exp = pd.Index([1, coerced_val, 2, 3, 4], dtype=coerced_dtype)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1.0, None),
# When float_numpy_dtype=float32, this is not the case
# see the correction below
(1.1, 1.1, np.float64),
(False, False, object), # GH#36319
("x", "x", object),
],
)
def test_insert_float_index(
self, float_numpy_dtype, insert, coerced_val, coerced_dtype
):
dtype = float_numpy_dtype
obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
if np_version_gt2 and dtype == "float32" and coerced_val == 1.1:
# Hack, in the 2nd test case, since 1.1 can be losslessly cast to float32
# the expected dtype will be float32 if the original dtype was float32
coerced_dtype = np.float32
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
],
ids=["datetime64", "datetime64tz"],
)
@pytest.mark.parametrize(
"insert_value",
[pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
)
def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
obj = pd.DatetimeIndex(
["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
).as_unit("ns")
assert obj.dtype == exp_dtype
exp = pd.DatetimeIndex(
["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"],
tz=fill_val.tz,
).as_unit("ns")
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
if fill_val.tz:
# mismatched tzawareness
ts = pd.Timestamp("2012-01-01")
result = obj.insert(1, ts)
expected = obj.astype(object).insert(1, ts)
assert expected.dtype == object
tm.assert_index_equal(result, expected)
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
result = obj.insert(1, ts)
# once deprecation is enforced:
expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
assert expected.dtype == obj.dtype
tm.assert_index_equal(result, expected)
else:
# mismatched tzawareness
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
result = obj.insert(1, ts)
expected = obj.astype(object).insert(1, ts)
assert expected.dtype == object
tm.assert_index_equal(result, expected)
item = 1
result = obj.insert(1, item)
expected = obj.astype(object).insert(1, item)
assert expected[1] == item
assert expected.dtype == object
tm.assert_index_equal(result, expected)
def test_insert_index_timedelta64(self):
obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
assert obj.dtype == "timedelta64[ns]"
# timedelta64 + timedelta64 => timedelta64
exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
self._assert_insert_conversion(
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
)
for item in [pd.Timestamp("2012-01-01"), 1]:
result = obj.insert(1, item)
expected = obj.astype(object).insert(1, item)
assert expected.dtype == object
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
(pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
(1, 1, object),
("x", "x", object),
],
)
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M")
assert obj.dtype == "period[M]"
data = [
pd.Period("2011-01", freq="M"),
coerced_val,
pd.Period("2011-02", freq="M"),
pd.Period("2011-03", freq="M"),
pd.Period("2011-04", freq="M"),
]
if isinstance(insert, pd.Period):
exp = pd.PeriodIndex(data, freq="M")
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
# string that can be parsed to appropriate PeriodDtype
self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype)
else:
result = obj.insert(0, insert)
expected = obj.astype(object).insert(0, insert)
tm.assert_index_equal(result, expected)
# TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
# casts that string to Period[M], not clear that is desirable
if not isinstance(insert, pd.Timestamp):
# non-castable string
result = obj.insert(0, str(insert))
expected = obj.astype(object).insert(0, str(insert))
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(reason="Test not implemented")
def test_insert_index_complex128(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_insert_index_bool(self):
raise NotImplementedError
class TestWhereCoercion(CoercionBase):
method = "where"
_cond = np.array([True, False, True, False])
def _assert_where_conversion(
self, original, cond, values, expected, expected_dtype
):
"""test coercion triggered by where"""
target = original.copy()
res = target.where(cond, values)
tm.assert_equal(res, expected)
assert res.dtype == expected_dtype
def _construct_exp(self, obj, klass, fill_val, exp_dtype):
if fill_val is True:
values = klass([True, False, True, True])
elif isinstance(fill_val, (datetime, np.datetime64)):
values = pd.date_range(fill_val, periods=4)
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype)
return values, exp
def _run_test(self, obj, fill_val, klass, exp_dtype):
cond = klass(self._cond)
exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype)
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
)
def test_where_object(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass(list("abcd"), dtype=object)
assert obj.dtype == object
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_where_int64(self, index_or_series, fill_val, exp_dtype, request):
klass = index_or_series
obj = klass([1, 2, 3, 4])
assert obj.dtype == np.int64
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.parametrize(
"fill_val, exp_dtype",
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_where_float64(self, index_or_series, fill_val, exp_dtype, request):
klass = index_or_series
obj = klass([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, object),
],
)
def test_where_complex128(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128)
assert obj.dtype == np.complex128
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)],
)
def test_where_series_bool(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass([True, False, True, False])
assert obj.dtype == np.bool_
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
],
ids=["datetime64", "datetime64tz"],
)
def test_where_datetime64(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None))
assert obj.dtype == "datetime64[ns]"
fv = fill_val
# do the check with each of the available datetime scalars
if exp_dtype == "datetime64[ns]":
for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
self._run_test(obj, scalar, klass, exp_dtype)
else:
for scalar in [fv, fv.to_pydatetime()]:
self._run_test(obj, fill_val, klass, exp_dtype)
@pytest.mark.xfail(reason="Test not implemented")
def test_where_index_complex128(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_where_index_bool(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_where_series_timedelta64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_where_series_period(self):
raise NotImplementedError
@pytest.mark.parametrize(
"value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")]
)
def test_where_index_timedelta64(self, value):
tdi = pd.timedelta_range("1 Day", periods=4)
cond = np.array([True, False, False, True])
expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"])
result = tdi.where(cond, value)
tm.assert_index_equal(result, expected)
# wrong-dtyped NaT
dtnat = np.datetime64("NaT", "ns")
expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object)
assert expected[1] is dtnat
result = tdi.where(cond, dtnat)
tm.assert_index_equal(result, expected)
def test_where_index_period(self):
dti = pd.date_range("2016-01-01", periods=3, freq="QS")
pi = dti.to_period("Q")
cond = np.array([False, True, False])
# Passing a valid scalar
value = pi[-1] + pi.freq * 10
expected = pd.PeriodIndex([value, pi[1], value])
result = pi.where(cond, value)
tm.assert_index_equal(result, expected)
# Case passing ndarray[object] of Periods
other = np.asarray(pi + pi.freq * 10, dtype=object)
result = pi.where(cond, other)
expected = pd.PeriodIndex([other[0], pi[1], other[2]])
tm.assert_index_equal(result, expected)
# Passing a mismatched scalar -> casts to object
td = pd.Timedelta(days=4)
expected = pd.Index([td, pi[1], td], dtype=object)
result = pi.where(cond, td)
tm.assert_index_equal(result, expected)
per = pd.Period("2020-04-21", "D")
expected = pd.Index([per, pi[1], per], dtype=object)
result = pi.where(cond, per)
tm.assert_index_equal(result, expected)
class TestFillnaSeriesCoercion(CoercionBase):
# not indexing, but place here for consistency
method = "fillna"
@pytest.mark.xfail(reason="Test not implemented")
def test_has_comprehensive_tests(self):
raise NotImplementedError
def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
"""test coercion triggered by fillna"""
target = original.copy()
res = target.fillna(value)
tm.assert_equal(res, expected)
assert res.dtype == expected_dtype
@pytest.mark.parametrize(
"fill_val, fill_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
)
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass(["a", np.nan, "c", "d"], dtype=object)
assert obj.dtype == object
exp = klass(["a", fill_val, "c", "d"], dtype=object)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_fillna_float64(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass([1.1, np.nan, 3.3, 4.4])
assert obj.dtype == np.float64
exp = klass([1.1, fill_val, 3.3, 4.4])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, object),
],
)
def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128)
assert obj.dtype == np.complex128
exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
(1, object),
("x", object),
],
ids=["datetime64", "datetime64tz", "object", "object"],
)
def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass(
[
pd.Timestamp("2011-01-01"),
pd.NaT,
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
exp = klass(
[
pd.Timestamp("2011-01-01"),
fill_val,
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
(pd.Timestamp("2012-01-01"), object),
# pre-2.0 with a mismatched tz we would get object result
(pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"),
(1, object),
("x", object),
],
)
def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
tz = "US/Eastern"
obj = klass(
[
pd.Timestamp("2011-01-01", tz=tz),
pd.NaT,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
assert obj.dtype == "datetime64[ns, US/Eastern]"
if getattr(fill_val, "tz", None) is None:
fv = fill_val
else:
fv = fill_val.tz_convert(tz)
exp = klass(
[
pd.Timestamp("2011-01-01", tz=tz),
fv,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val",
[
1,
1.1,
1 + 1j,
True,
pd.Interval(1, 2, closed="left"),
pd.Timestamp("2012-01-01", tz="US/Eastern"),
pd.Timestamp("2012-01-01"),
pd.Timedelta(days=1),
pd.Period("2016-01-01", "D"),
],
)
def test_fillna_interval(self, index_or_series, fill_val):
ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan)
assert isinstance(ii.dtype, pd.IntervalDtype)
obj = index_or_series(ii)
exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object)
fill_dtype = object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_series_int64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_index_int64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_series_bool(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_index_bool(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_series_timedelta64(self):
raise NotImplementedError
@pytest.mark.parametrize(
"fill_val",
[
1,
1.1,
1 + 1j,
True,
pd.Interval(1, 2, closed="left"),
pd.Timestamp("2012-01-01", tz="US/Eastern"),
pd.Timestamp("2012-01-01"),
pd.Timedelta(days=1),
pd.Period("2016-01-01", "W"),
],
)
def test_fillna_series_period(self, index_or_series, fill_val):
pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
assert isinstance(pi.dtype, pd.PeriodDtype)
obj = index_or_series(pi)
exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
fill_dtype = object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_index_timedelta64(self):
raise NotImplementedError
@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_index_period(self):
raise NotImplementedError
class TestReplaceSeriesCoercion(CoercionBase):
klasses = ["series"]
method = "replace"
rep: dict[str, list] = {}
rep["object"] = ["a", "b"]
rep["int64"] = [4, 5]
rep["float64"] = [1.1, 2.2]
rep["complex128"] = [1 + 1j, 2 + 2j]
rep["bool"] = [True, False]
rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
for tz in ["UTC", "US/Eastern"]:
# to test tz => different tz replacement
key = f"datetime64[ns, {tz}]"
rep[key] = [
pd.Timestamp("2011-01-01", tz=tz),
pd.Timestamp("2011-01-03", tz=tz),
]
rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
@pytest.fixture(params=["dict", "series"])
def how(self, request):
return request.param
@pytest.fixture(
params=[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64[ns]",
"datetime64[ns, UTC]",
"datetime64[ns, US/Eastern]",
"timedelta64[ns]",
]
)
def from_key(self, request):
return request.param
@pytest.fixture(
params=[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64[ns]",
"datetime64[ns, UTC]",
"datetime64[ns, US/Eastern]",
"timedelta64[ns]",
],
ids=[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64",
"datetime64tz",
"datetime64tz",
"timedelta64",
],
)
def to_key(self, request):
return request.param
@pytest.fixture
def replacer(self, how, from_key, to_key):
"""
Object we will pass to `Series.replace`
"""
if how == "dict":
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == "series":
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
return replacer
# Expected needs adjustment for the infer string option, seems to work as expecetd
@pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
def test_replace_series(self, how, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xxx")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
if from_key.startswith("datetime") and to_key.startswith("datetime"):
# tested below
return
elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
# tested below
return
if (from_key == "float64" and to_key in ("int64")) or (
from_key == "complex128" and to_key in ("int64", "float64")
):
if not IS64 or is_platform_windows():
pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")
# Expected: do not downcast by replacement
exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key)
else:
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key
msg = "Downcasting behavior in `replace`"
warn = FutureWarning
if (
exp.dtype == obj.dtype
or exp.dtype == object
or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
):
warn = None
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"to_key",
["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"],
indirect=True,
)
@pytest.mark.parametrize(
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
)
def test_replace_series_datetime_tz(
self, how, to_key, from_key, replacer, using_infer_string
):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
if using_infer_string and to_key == "object":
assert exp.dtype == "string"
else:
assert exp.dtype == to_key
msg = "Downcasting behavior in `replace`"
warn = FutureWarning if exp.dtype != object else None
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"to_key",
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
indirect=True,
)
@pytest.mark.parametrize(
"from_key",
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
indirect=True,
)
def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
warn = FutureWarning
if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
exp.dtype, pd.DatetimeTZDtype
):
# with mismatched tzs, we retain the original dtype as of 2.0
exp = exp.astype(obj.dtype)
warn = None
else:
assert exp.dtype == to_key
if to_key == from_key:
warn = None
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)
tm.assert_series_equal(result, exp)
@pytest.mark.xfail(reason="Test not implemented")
def test_replace_series_period(self):
raise NotImplementedError

View File

@ -0,0 +1,191 @@
import re
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDatetimeIndex:
def test_get_loc_naive_dti_aware_str_deprecated(self):
# GH#46903
ts = Timestamp("20130101")._value
dti = pd.DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(range(100), index=dti)
key = "2013-01-01 00:00:00.000000050+0000"
msg = re.escape(repr(key))
with pytest.raises(KeyError, match=msg):
ser[key]
with pytest.raises(KeyError, match=msg):
dti.get_loc(key)
def test_indexing_with_datetime_tz(self):
# GH#8260
# support datetime64 with tz
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
dr = date_range("20130110", periods=3)
df = DataFrame({"A": idx, "B": dr})
df["C"] = idx
df.iloc[1, 1] = pd.NaT
df.iloc[1, 2] = pd.NaT
expected = Series(
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
index=list("ABC"),
dtype="object",
name=1,
)
# indexing
result = df.iloc[1]
tm.assert_series_equal(result, expected)
result = df.loc[1]
tm.assert_series_equal(result, expected)
def test_indexing_fast_xs(self):
# indexing - fast_xs
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
result = df.iloc[5]
expected = Series(
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")],
index=["a"],
name=5,
dtype="M8[ns, UTC]",
)
tm.assert_series_equal(result, expected)
result = df.loc[5]
tm.assert_series_equal(result, expected)
# indexing - boolean
result = df[df.a > df.a[3]]
expected = df.iloc[4:]
tm.assert_frame_equal(result, expected)
def test_consistency_with_tz_aware_scalar(self):
# xef gh-12938
# various ways of indexing the same tz-aware scalar
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
df = pd.concat([df, df]).reset_index(drop=True)
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
result = df[0][0]
assert result == expected
result = df.iloc[0, 0]
assert result == expected
result = df.loc[0, 0]
assert result == expected
result = df.iat[0, 0]
assert result == expected
result = df.at[0, 0]
assert result == expected
result = df[0].loc[0]
assert result == expected
result = df[0].at[0]
assert result == expected
def test_indexing_with_datetimeindex_tz(self, indexer_sl):
# GH 12050
# indexing on a series with a datetimeindex with tz
index = date_range("2015-01-01", periods=2, tz="utc")
ser = Series(range(2), index=index, dtype="int64")
# list-like indexing
for sel in (index, list(index)):
# getitem
result = indexer_sl(ser)[sel]
expected = ser.copy()
if sel is not index:
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
# setitem
result = ser.copy()
indexer_sl(result)[sel] = 1
expected = Series(1, index=index)
tm.assert_series_equal(result, expected)
# single element indexing
# getitem
assert indexer_sl(ser)[index[1]] == 1
# setitem
result = ser.copy()
indexer_sl(result)[index[1]] = 5
expected = Series([0, 5], index=index)
tm.assert_series_equal(result, expected)
def test_nanosecond_getitem_setitem_with_tz(self):
# GH 11679
data = ["2016-06-28 08:30:00.123456789"]
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
df = DataFrame({"a": [10]}, index=index)
result = df.loc[df.index[0]]
expected = Series(10, index=["a"], name=df.index[0])
tm.assert_series_equal(result, expected)
result = df.copy()
result.loc[df.index[0], "a"] = -1
expected = DataFrame(-1, index=index, columns=["a"])
tm.assert_frame_equal(result, expected)
def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
# GH#33589
keys = [
"2017-10-25T16:25:04.151",
"2017-10-25T16:25:04.252",
"2017-10-25T16:50:05.237",
"2017-10-25T16:50:05.238",
]
obj = frame_or_series(
[1, 2, 3, 4],
index=[Timestamp(x) for x in keys],
)
result = obj[keys[1] : keys[2]]
expected = frame_or_series(
[2, 3],
index=[
Timestamp(keys[1]),
Timestamp(keys[2]),
],
)
tm.assert_equal(result, expected)
def test_getitem_pyarrow_index(self, frame_or_series):
# GH 53644
pytest.importorskip("pyarrow")
obj = frame_or_series(
range(5),
index=date_range("2020", freq="D", periods=5).astype(
"timestamp[us][pyarrow]"
),
)
result = obj.loc[obj.index[:-3]]
expected = frame_or_series(
range(2),
index=date_range("2020", freq="D", periods=2).astype(
"timestamp[us][pyarrow]"
),
)
tm.assert_equal(result, expected)

View File

@ -0,0 +1,689 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
RangeIndex,
Series,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
def gen_obj(klass, index):
if klass is Series:
obj = Series(np.arange(len(index)), index=index)
else:
obj = DataFrame(
np.random.default_rng(2).standard_normal((len(index), len(index))),
index=index,
columns=index,
)
return obj
class TestFloatIndexers:
def check(self, result, original, indexer, getitem):
"""
comparator for results
we need to take care if we are indexing on a
Series or a frame
"""
if isinstance(original, Series):
expected = original.iloc[indexer]
elif getitem:
expected = original.iloc[:, indexer]
else:
expected = original.iloc[indexer]
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
Index(list("abcde")),
Index(list("abcde"), dtype="category"),
date_range("2020-01-01", periods=5),
timedelta_range("1 day", periods=5),
period_range("2020-01-01", periods=5),
],
)
def test_scalar_non_numeric(self, index, frame_or_series, indexer_sl):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
s = gen_obj(frame_or_series, index)
# getting
with pytest.raises(KeyError, match="^3.0$"):
indexer_sl(s)[3.0]
# contains
assert 3.0 not in s
s2 = s.copy()
indexer_sl(s2)[3.0] = 10
if indexer_sl is tm.setitem:
assert 3.0 in s2.axes[-1]
elif indexer_sl is tm.loc:
assert 3.0 in s2.axes[0]
else:
assert 3.0 not in s2.axes[0]
assert 3.0 not in s2.axes[-1]
@pytest.mark.parametrize(
"index",
[
Index(list("abcde")),
Index(list("abcde"), dtype="category"),
date_range("2020-01-01", periods=5),
timedelta_range("1 day", periods=5),
period_range("2020-01-01", periods=5),
],
)
def test_scalar_non_numeric_series_fallback(self, index):
# fallsback to position selection, series only
s = Series(np.arange(len(index)), index=index)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
s[3]
with pytest.raises(KeyError, match="^3.0$"):
s[3.0]
def test_scalar_with_mixed(self, indexer_sl):
s2 = Series([1, 2, 3], index=["a", "b", "c"])
s3 = Series([1, 2, 3], index=["a", "b", 1.5])
# lookup in a pure string index with an invalid indexer
with pytest.raises(KeyError, match="^1.0$"):
indexer_sl(s2)[1.0]
with pytest.raises(KeyError, match=r"^1\.0$"):
indexer_sl(s2)[1.0]
result = indexer_sl(s2)["b"]
expected = 2
assert result == expected
# mixed index so we have label
# indexing
with pytest.raises(KeyError, match="^1.0$"):
indexer_sl(s3)[1.0]
if indexer_sl is not tm.loc:
# __getitem__ falls back to positional
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s3[1]
expected = 2
assert result == expected
with pytest.raises(KeyError, match=r"^1\.0$"):
indexer_sl(s3)[1.0]
result = indexer_sl(s3)[1.5]
expected = 3
assert result == expected
@pytest.mark.parametrize(
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
)
def test_scalar_integer(self, index, frame_or_series, indexer_sl):
getitem = indexer_sl is not tm.loc
# test how scalar float indexers work on int indexes
# integer index
i = index
obj = gen_obj(frame_or_series, i)
# coerce to equal int
result = indexer_sl(obj)[3.0]
self.check(result, obj, 3, getitem)
if isinstance(obj, Series):
def compare(x, y):
assert x == y
expected = 100
else:
compare = tm.assert_series_equal
if getitem:
expected = Series(100, index=range(len(obj)), name=3)
else:
expected = Series(100.0, index=range(len(obj)), name=3)
s2 = obj.copy()
indexer_sl(s2)[3.0] = 100
result = indexer_sl(s2)[3.0]
compare(result, expected)
result = indexer_sl(s2)[3]
compare(result, expected)
@pytest.mark.parametrize(
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
)
def test_scalar_integer_contains_float(self, index, frame_or_series):
# contains
# integer index
obj = gen_obj(frame_or_series, index)
# coerce to equal int
assert 3.0 in obj
def test_scalar_float(self, frame_or_series):
# scalar float indexers work on a float index
index = Index(np.arange(5.0))
s = gen_obj(frame_or_series, index)
# assert all operations except for iloc are ok
indexer = index[3]
for idxr in [tm.loc, tm.setitem]:
getitem = idxr is not tm.loc
# getting
result = idxr(s)[indexer]
self.check(result, s, 3, getitem)
# setting
s2 = s.copy()
result = idxr(s2)[indexer]
self.check(result, s, 3, getitem)
# random float is a KeyError
with pytest.raises(KeyError, match=r"^3\.5$"):
idxr(s)[3.5]
# contains
assert 3.0 in s
# iloc succeeds with an integer
expected = s.iloc[3]
s2 = s.copy()
s2.iloc[3] = expected
result = s2.iloc[3]
self.check(result, s, 3, False)
@pytest.mark.parametrize(
"index",
[
Index(list("abcde"), dtype=object),
date_range("2020-01-01", periods=5),
timedelta_range("1 day", periods=5),
period_range("2020-01-01", periods=5),
],
)
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
def test_slice_non_numeric(self, index, idx, frame_or_series, indexer_sli):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
s = gen_obj(frame_or_series, index)
# getitem
if indexer_sli is tm.iloc:
msg = (
"cannot do positional indexing "
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
"type float"
)
else:
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers "
r"\[(3|4)(\.0)?\] "
r"of type (float|int)"
)
with pytest.raises(TypeError, match=msg):
indexer_sli(s)[idx]
# setitem
if indexer_sli is tm.iloc:
# otherwise we keep the same message as above
msg = "slice indices must be integers or None or have an __index__ method"
with pytest.raises(TypeError, match=msg):
indexer_sli(s)[idx] = 0
def test_slice_integer(self):
# same as above, but for Integer based indexes
# these coerce to a like integer
# oob indicates if we are out of bounds
# of positional indexing
for index, oob in [
(Index(np.arange(5, dtype=np.int64)), False),
(RangeIndex(5), False),
(Index(np.arange(5, dtype=np.int64) + 10), True),
]:
# s is an in-range index
s = Series(range(5), index=index)
# getitem
for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
result = s.loc[idx]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(3, 5)
self.check(result, s, indexer, False)
# getitem out-of-bounds
for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
result = s.loc[idx]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(-6, 6)
self.check(result, s, indexer, False)
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[slice(-6.0, 6.0)]
# getitem odd floats
for idx, res1 in [
(slice(2.5, 4), slice(3, 5)),
(slice(2, 3.5), slice(2, 4)),
(slice(2.5, 3.5), slice(3, 4)),
]:
result = s.loc[idx]
if oob:
res = slice(0, 0)
else:
res = res1
self.check(result, s, res, False)
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[idx]
@pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
def test_integer_positional_indexing(self, idx):
"""make sure that we are raising on positional indexing
w.r.t. an integer index
"""
s = Series(range(2, 6), index=range(2, 6))
result = s[2:4]
expected = s.iloc[2:4]
tm.assert_series_equal(result, expected)
klass = RangeIndex
msg = (
"cannot do (slice|positional) indexing "
rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[idx]
with pytest.raises(TypeError, match=msg):
s.iloc[idx]
@pytest.mark.parametrize(
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
)
def test_slice_integer_frame_getitem(self, index):
# similar to above, but on the getitem dim (of a DataFrame)
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
# getitem
for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
result = s.loc[idx]
indexer = slice(0, 2)
self.check(result, s, indexer, False)
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[idx]
# getitem out-of-bounds
for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
result = s.loc[idx]
self.check(result, s, slice(-10, 10), True)
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[slice(-10.0, 10.0)]
# getitem odd floats
for idx, res in [
(slice(0.5, 1), slice(1, 2)),
(slice(0, 0.5), slice(0, 1)),
(slice(0.5, 1.5), slice(1, 2)),
]:
result = s.loc[idx]
self.check(result, s, res, False)
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[0\.5\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[idx]
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
@pytest.mark.parametrize(
"index", [Index(np.arange(5), dtype=np.int64), RangeIndex(5)]
)
def test_float_slice_getitem_with_integer_index_raises(self, idx, index):
# similar to above, but on the getitem dim (of a DataFrame)
s = DataFrame(np.random.default_rng(2).standard_normal((5, 2)), index=index)
# setitem
sc = s.copy()
sc.loc[idx] = 0
result = sc.loc[idx].values.ravel()
assert (result == 0).all()
# positional indexing
msg = (
"cannot do slice indexing "
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
"type float"
)
with pytest.raises(TypeError, match=msg):
s[idx] = 0
with pytest.raises(TypeError, match=msg):
s[idx]
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
def test_slice_float(self, idx, frame_or_series, indexer_sl):
# same as above, but for floats
index = Index(np.arange(5.0)) + 0.1
s = gen_obj(frame_or_series, index)
expected = s.iloc[3:4]
# getitem
result = indexer_sl(s)[idx]
assert isinstance(result, type(s))
tm.assert_equal(result, expected)
# setitem
s2 = s.copy()
indexer_sl(s2)[idx] = 0
result = indexer_sl(s2)[idx].values.ravel()
assert (result == 0).all()
def test_floating_index_doc_example(self):
index = Index([1.5, 2, 3, 4.5, 5])
s = Series(range(5), index=index)
assert s[3] == 2
assert s.loc[3] == 2
assert s.iloc[3] == 3
def test_floating_misc(self, indexer_sl):
# related 236
# scalar/slicing of a float index
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
# label based slicing
result = indexer_sl(s)[1.0:3.0]
expected = Series(1, index=[2.5])
tm.assert_series_equal(result, expected)
# exact indexing when found
result = indexer_sl(s)[5.0]
assert result == 2
result = indexer_sl(s)[5]
assert result == 2
# value not found (and no fallbacking at all)
# scalar integers
with pytest.raises(KeyError, match=r"^4$"):
indexer_sl(s)[4]
# fancy floats/integers create the correct entry (as nan)
# fancy tests
expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64))
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
for fancy_idx in [[5, 0], np.array([5, 0])]:
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
warn = FutureWarning if indexer_sl is tm.setitem else None
msg = r"The behavior of obj\[i:j\] with a float-dtype index"
# all should return the same as we are slicing 'the same'
with tm.assert_produces_warning(warn, match=msg):
result1 = indexer_sl(s)[2:5]
result2 = indexer_sl(s)[2.0:5.0]
result3 = indexer_sl(s)[2.0:5]
result4 = indexer_sl(s)[2.1:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)
expected = Series([1, 2], index=[2.5, 5.0])
with tm.assert_produces_warning(warn, match=msg):
result = indexer_sl(s)[2:5]
tm.assert_series_equal(result, expected)
# list selection
result1 = indexer_sl(s)[[0.0, 5, 10]]
result2 = s.iloc[[0, 2, 4]]
tm.assert_series_equal(result1, result2)
with pytest.raises(KeyError, match="not in index"):
indexer_sl(s)[[1.6, 5, 10]]
with pytest.raises(KeyError, match="not in index"):
indexer_sl(s)[[0, 1, 2]]
result = indexer_sl(s)[[2.5, 5]]
tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
result = indexer_sl(s)[[2.5]]
tm.assert_series_equal(result, Series([1], index=[2.5]))
def test_floatindex_slicing_bug(self, float_numpy_dtype):
# GH 5557, related to slicing a float index
dtype = float_numpy_dtype
ser = {
256: 2321.0,
1: 78.0,
2: 2716.0,
3: 0.0,
4: 369.0,
5: 0.0,
6: 269.0,
7: 0.0,
8: 0.0,
9: 0.0,
10: 3536.0,
11: 0.0,
12: 24.0,
13: 0.0,
14: 931.0,
15: 0.0,
16: 101.0,
17: 78.0,
18: 9643.0,
19: 0.0,
20: 0.0,
21: 0.0,
22: 63761.0,
23: 0.0,
24: 446.0,
25: 0.0,
26: 34773.0,
27: 0.0,
28: 729.0,
29: 78.0,
30: 0.0,
31: 0.0,
32: 3374.0,
33: 0.0,
34: 1391.0,
35: 0.0,
36: 361.0,
37: 0.0,
38: 61808.0,
39: 0.0,
40: 0.0,
41: 0.0,
42: 6677.0,
43: 0.0,
44: 802.0,
45: 0.0,
46: 2691.0,
47: 0.0,
48: 3582.0,
49: 0.0,
50: 734.0,
51: 0.0,
52: 627.0,
53: 70.0,
54: 2584.0,
55: 0.0,
56: 324.0,
57: 0.0,
58: 605.0,
59: 0.0,
60: 0.0,
61: 0.0,
62: 3989.0,
63: 10.0,
64: 42.0,
65: 0.0,
66: 904.0,
67: 0.0,
68: 88.0,
69: 70.0,
70: 8172.0,
71: 0.0,
72: 0.0,
73: 0.0,
74: 64902.0,
75: 0.0,
76: 347.0,
77: 0.0,
78: 36605.0,
79: 0.0,
80: 379.0,
81: 70.0,
82: 0.0,
83: 0.0,
84: 3001.0,
85: 0.0,
86: 1630.0,
87: 7.0,
88: 364.0,
89: 0.0,
90: 67404.0,
91: 9.0,
92: 0.0,
93: 0.0,
94: 7685.0,
95: 0.0,
96: 1017.0,
97: 0.0,
98: 2831.0,
99: 0.0,
100: 2963.0,
101: 0.0,
102: 854.0,
103: 0.0,
104: 0.0,
105: 0.0,
106: 0.0,
107: 0.0,
108: 0.0,
109: 0.0,
110: 0.0,
111: 0.0,
112: 0.0,
113: 0.0,
114: 0.0,
115: 0.0,
116: 0.0,
117: 0.0,
118: 0.0,
119: 0.0,
120: 0.0,
121: 0.0,
122: 0.0,
123: 0.0,
124: 0.0,
125: 0.0,
126: 67744.0,
127: 22.0,
128: 264.0,
129: 0.0,
260: 197.0,
268: 0.0,
265: 0.0,
269: 0.0,
261: 0.0,
266: 1198.0,
267: 0.0,
262: 2629.0,
258: 775.0,
257: 0.0,
263: 0.0,
259: 0.0,
264: 163.0,
250: 10326.0,
251: 0.0,
252: 1228.0,
253: 0.0,
254: 2769.0,
255: 0.0,
}
# smoke test for the repr
s = Series(ser, dtype=dtype)
result = s.value_counts()
assert result.index.dtype == dtype
str(result)

View File

@ -0,0 +1,53 @@
import numpy as np
from pandas import (
DataFrame,
Series,
period_range,
)
import pandas._testing as tm
def test_iat(float_frame):
for i, row in enumerate(float_frame.index):
for j, col in enumerate(float_frame.columns):
result = float_frame.iat[i, j]
expected = float_frame.at[row, col]
assert result == expected
def test_iat_duplicate_columns():
# https://github.com/pandas-dev/pandas/issues/11754
df = DataFrame([[1, 2]], columns=["x", "x"])
assert df.iat[0, 0] == 1
def test_iat_getitem_series_with_period_index():
# GH#4390, iat incorrectly indexing
index = period_range("1/1/2001", periods=10)
ser = Series(np.random.default_rng(2).standard_normal(10), index=index)
expected = ser[index[0]]
result = ser.iat[0]
assert expected == result
def test_iat_setitem_item_cache_cleared(
indexer_ial, using_copy_on_write, warn_copy_on_write
):
# GH#45684
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
df = DataFrame(data).copy()
ser = df["y"]
# previously this iat setting would split the block and fail to clear
# the item_cache.
with tm.assert_cow_warning(warn_copy_on_write):
indexer_ial(df)[7, 0] = 9999
with tm.assert_cow_warning(warn_copy_on_write):
indexer_ial(df)[7, 1] = 1234
assert df.iat[7, 1] == 1234
if not using_copy_on_write:
assert ser.iloc[-1] == 1234
assert df.iloc[-1, -1] == 1234

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,61 @@
# Tests aimed at pandas.core.indexers
import numpy as np
import pytest
from pandas.core.indexers import (
is_scalar_indexer,
length_of_indexer,
validate_indices,
)
def test_length_of_indexer():
arr = np.zeros(4, dtype=bool)
arr[0] = 1
result = length_of_indexer(arr)
assert result == 1
def test_is_scalar_indexer():
indexer = (0, 1)
assert is_scalar_indexer(indexer, 2)
assert not is_scalar_indexer(indexer[0], 2)
indexer = (np.array([2]), 1)
assert not is_scalar_indexer(indexer, 2)
indexer = (np.array([2]), np.array([3]))
assert not is_scalar_indexer(indexer, 2)
indexer = (np.array([2]), np.array([3, 4]))
assert not is_scalar_indexer(indexer, 2)
assert not is_scalar_indexer(slice(None), 1)
indexer = 0
assert is_scalar_indexer(indexer, 1)
indexer = (0,)
assert is_scalar_indexer(indexer, 1)
class TestValidateIndices:
def test_validate_indices_ok(self):
indices = np.asarray([0, 1])
validate_indices(indices, 2)
validate_indices(indices[:0], 0)
validate_indices(np.array([-1, -1]), 0)
def test_validate_indices_low(self):
indices = np.asarray([0, -2])
with pytest.raises(ValueError, match="'indices' contains"):
validate_indices(indices, 2)
def test_validate_indices_high(self):
indices = np.asarray([0, 1, 2])
with pytest.raises(IndexError, match="indices are out"):
validate_indices(indices, 2)
def test_validate_indices_empty(self):
with pytest.raises(IndexError, match="indices are out"):
validate_indices(np.array([0, 1]), 0)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,75 @@
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize(
"values, dtype",
[
([], "object"),
([1, 2, 3], "int64"),
([1.0, 2.0, 3.0], "float64"),
(["a", "b", "c"], "object"),
(["a", "b", "c"], "string"),
([1, 2, 3], "datetime64[ns]"),
([1, 2, 3], "datetime64[ns, CET]"),
([1, 2, 3], "timedelta64[ns]"),
(["2000", "2001", "2002"], "Period[D]"),
([1, 0, 3], "Sparse"),
([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"),
],
)
@pytest.mark.parametrize(
"mask", [[True, False, False], [True, True, True], [False, False, False]]
)
@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series])
@pytest.mark.parametrize("frame", [True, False])
def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
# In case len(values) < 3
index = ["a", "b", "c"][: len(values)]
mask = mask[: len(values)]
obj = pd.Series(values, dtype=dtype, index=index)
if frame:
if len(values) == 0:
# Otherwise obj is an empty DataFrame with shape (0, 1)
obj = pd.DataFrame(dtype=dtype, index=index)
else:
obj = obj.to_frame()
if indexer_class is pd.array:
mask = pd.array(mask, dtype="boolean")
elif indexer_class is pd.Series:
mask = pd.Series(mask, index=obj.index, dtype="boolean")
else:
mask = indexer_class(mask)
expected = obj[mask]
result = obj[mask]
tm.assert_equal(result, expected)
if indexer_class is pd.Series:
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
with pytest.raises(ValueError, match=msg):
result = obj.iloc[mask]
tm.assert_equal(result, expected)
else:
result = obj.iloc[mask]
tm.assert_equal(result, expected)
result = obj.loc[mask]
tm.assert_equal(result, expected)
def test_na_treated_as_false(frame_or_series, indexer_sli):
# https://github.com/pandas-dev/pandas/issues/31503
obj = frame_or_series([1, 2, 3])
mask = pd.array([True, False, None], dtype="boolean")
result = indexer_sli(obj)[mask]
expected = indexer_sli(obj)[mask.fillna(False)]
tm.assert_equal(result, expected)

View File

@ -0,0 +1,702 @@
"""
test setting *parts* of objects both positionally and label based
TODO: these should be split among the indexer tests
"""
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
Period,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
class TestEmptyFrameSetitemExpansion:
def test_empty_frame_setitem_index_name_retained(self):
# GH#31368 empty frame has non-None index.name -> retained
df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
df["series"] = series
expected = DataFrame(
{"series": [1.23] * 4},
index=pd.RangeIndex(4, name="df_index"),
columns=Index(["series"], dtype=object),
)
tm.assert_frame_equal(df, expected)
def test_empty_frame_setitem_index_name_inherited(self):
# GH#36527 empty frame has None index.name -> not retained
df = DataFrame()
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
df["series"] = series
expected = DataFrame(
{"series": [1.23] * 4},
index=pd.RangeIndex(4, name="series_index"),
columns=Index(["series"], dtype=object),
)
tm.assert_frame_equal(df, expected)
def test_loc_setitem_zerolen_series_columns_align(self):
# columns will align
df = DataFrame(columns=["A", "B"])
df.loc[0] = Series(1, index=range(4))
expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
tm.assert_frame_equal(df, expected)
# columns will align
df = DataFrame(columns=["A", "B"])
df.loc[0] = Series(1, index=["B"])
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
tm.assert_frame_equal(df, exp)
def test_loc_setitem_zerolen_list_length_must_match_columns(self):
# list-like must conform
df = DataFrame(columns=["A", "B"])
msg = "cannot set a row with mismatched columns"
with pytest.raises(ValueError, match=msg):
df.loc[0] = [1, 2, 3]
df = DataFrame(columns=["A", "B"])
df.loc[3] = [6, 7] # length matches len(df.columns) --> OK!
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64)
tm.assert_frame_equal(df, exp)
def test_partial_set_empty_frame(self):
# partially set with an empty object
# frame
df = DataFrame()
msg = "cannot set a frame with no defined columns"
with pytest.raises(ValueError, match=msg):
df.loc[1] = 1
with pytest.raises(ValueError, match=msg):
df.loc[1] = Series([1], index=["foo"])
msg = "cannot set a frame with no defined index and a scalar"
with pytest.raises(ValueError, match=msg):
df.loc[:, 1] = 1
def test_partial_set_empty_frame2(self):
# these work as they don't really change
# anything but the index
# GH#5632
expected = DataFrame(
columns=Index(["foo"], dtype=object), index=Index([], dtype="object")
)
df = DataFrame(index=Index([], dtype="object"))
df["foo"] = Series([], dtype="object")
tm.assert_frame_equal(df, expected)
df = DataFrame(index=Index([]))
df["foo"] = Series(df.index)
tm.assert_frame_equal(df, expected)
df = DataFrame(index=Index([]))
df["foo"] = df.index
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame3(self):
expected = DataFrame(
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
)
expected["foo"] = expected["foo"].astype("float64")
df = DataFrame(index=Index([], dtype="int64"))
df["foo"] = []
tm.assert_frame_equal(df, expected)
df = DataFrame(index=Index([], dtype="int64"))
df["foo"] = Series(np.arange(len(df)), dtype="float64")
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame4(self):
df = DataFrame(index=Index([], dtype="int64"))
df["foo"] = range(len(df))
expected = DataFrame(
columns=Index(["foo"], dtype=object), index=Index([], dtype="int64")
)
# range is int-dtype-like, so we get int64 dtype
expected["foo"] = expected["foo"].astype("int64")
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame5(self):
df = DataFrame()
tm.assert_index_equal(df.columns, pd.RangeIndex(0))
df2 = DataFrame()
df2[1] = Series([1], index=["foo"])
df.loc[:, 1] = Series([1], index=["foo"])
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
tm.assert_frame_equal(df, df2)
def test_partial_set_empty_frame_no_index(self):
# no index to start
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
df = DataFrame(columns=["A", "B"])
df[0] = Series(1, index=range(4))
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=["A", "B"])
df.loc[:, 0] = Series(1, index=range(4))
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_row(self):
# GH#5720, GH#5744
# don't create rows when empty
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
expected["A"] = expected["A"].astype("int64")
expected["B"] = expected["B"].astype("float64")
expected["New"] = expected["New"].astype("float64")
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
y["New"] = np.nan
tm.assert_frame_equal(y, expected)
expected = DataFrame(columns=["a", "b", "c c", "d"])
expected["d"] = expected["d"].astype("int64")
df = DataFrame(columns=["a", "b", "c c"])
df["d"] = 3
tm.assert_frame_equal(df, expected)
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
# reindex columns is ok
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
result = y.reindex(columns=["A", "B", "C"])
expected = DataFrame(columns=["A", "B", "C"])
expected["A"] = expected["A"].astype("int64")
expected["B"] = expected["B"].astype("float64")
expected["C"] = expected["C"].astype("float64")
tm.assert_frame_equal(result, expected)
def test_partial_set_empty_frame_set_series(self):
# GH#5756
# setting with empty Series
df = DataFrame(Series(dtype=object))
expected = DataFrame({0: Series(dtype=object)})
tm.assert_frame_equal(df, expected)
df = DataFrame(Series(name="foo", dtype=object))
expected = DataFrame({"foo": Series(dtype=object)})
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_empty_copy_assignment(self):
# GH#5932
# copy on empty with assignment fails
df = DataFrame(index=[0])
df = df.copy()
df["a"] = 0
expected = DataFrame(0, index=[0], columns=Index(["a"], dtype=object))
tm.assert_frame_equal(df, expected)
def test_partial_set_empty_frame_empty_consistencies(self, using_infer_string):
# GH#6171
# consistency on empty frames
df = DataFrame(columns=["x", "y"])
df["x"] = [1, 2]
expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
tm.assert_frame_equal(df, expected, check_dtype=False)
df = DataFrame(columns=["x", "y"])
df["x"] = ["1", "2"]
expected = DataFrame(
{
"x": Series(
["1", "2"],
dtype=object if not using_infer_string else "string[pyarrow_numpy]",
),
"y": Series([np.nan, np.nan], dtype=object),
}
)
tm.assert_frame_equal(df, expected)
df = DataFrame(columns=["x", "y"])
df.loc[0, "x"] = 1
expected = DataFrame({"x": [1], "y": [np.nan]})
tm.assert_frame_equal(df, expected, check_dtype=False)
class TestPartialSetting:
def test_partial_setting(self):
# GH2578, allow ix and friends to partially set
# series
s_orig = Series([1, 2, 3])
s = s_orig.copy()
s[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s[5] = 5.0
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
s = s_orig.copy()
s.loc[5] = 5.0
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
tm.assert_series_equal(s, expected)
# iloc/iat raise
s = s_orig.copy()
msg = "iloc cannot enlarge its target object"
with pytest.raises(IndexError, match=msg):
s.iloc[3] = 5.0
msg = "index 3 is out of bounds for axis 0 with size 3"
with pytest.raises(IndexError, match=msg):
s.iat[3] = 5.0
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_partial_setting_frame(self, using_array_manager):
df_orig = DataFrame(
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
)
# iloc/iat raise
df = df_orig.copy()
msg = "iloc cannot enlarge its target object"
with pytest.raises(IndexError, match=msg):
df.iloc[4, 2] = 5.0
msg = "index 2 is out of bounds for axis 0 with size 2"
if using_array_manager:
msg = "list index out of range"
with pytest.raises(IndexError, match=msg):
df.iat[4, 2] = 5.0
# row setting where it exists
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
df = df_orig.copy()
df.iloc[1] = df.iloc[2]
tm.assert_frame_equal(df, expected)
expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
df = df_orig.copy()
df.loc[1] = df.loc[2]
tm.assert_frame_equal(df, expected)
# like 2578, partial setting with dtype preservation
expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})
df = df_orig.copy()
df.loc[3] = df.loc[2]
tm.assert_frame_equal(df, expected)
# single dtype frame, overwrite
expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]})
df = df_orig.copy()
df.loc[:, "B"] = df.loc[:, "A"]
tm.assert_frame_equal(df, expected)
# mixed dtype frame, overwrite
expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})
df = df_orig.copy()
df["B"] = df["B"].astype(np.float64)
# as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
# setting inplace
df.loc[:, "B"] = df.loc[:, "A"]
tm.assert_frame_equal(df, expected)
# single dtype frame, partial setting
expected = df_orig.copy()
expected["C"] = df["A"]
df = df_orig.copy()
df.loc[:, "C"] = df.loc[:, "A"]
tm.assert_frame_equal(df, expected)
# mixed frame, partial setting
expected = df_orig.copy()
expected["C"] = df["A"]
df = df_orig.copy()
df.loc[:, "C"] = df.loc[:, "A"]
tm.assert_frame_equal(df, expected)
def test_partial_setting2(self):
# GH 8473
dates = date_range("1/1/2000", periods=8)
df_orig = DataFrame(
np.random.default_rng(2).standard_normal((8, 4)),
index=dates,
columns=["A", "B", "C", "D"],
)
expected = pd.concat(
[df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, "A"] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, "A"] = 7
tm.assert_frame_equal(df, expected)
exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
expected = pd.concat([df_orig, exp_other], axis=1)
df = df_orig.copy()
df.loc[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.at[dates[-1] + dates.freq, 0] = 7
tm.assert_frame_equal(df, expected)
def test_partial_setting_mixed_dtype(self):
# in a mixed dtype environment, try to preserve dtypes
# by appending
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
s = df.loc[1].copy()
s.name = 2
expected = pd.concat([df, DataFrame(s).T.infer_objects()])
df.loc[2] = df.loc[1]
tm.assert_frame_equal(df, expected)
def test_series_partial_set(self):
# partial set with new index
# Regression from GH4825
ser = Series([0.1, 0.2], index=[1, 2])
# loc equiv to .reindex
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
with pytest.raises(KeyError, match=r"not in index"):
ser.loc[[3, 2, 3]]
result = ser.reindex([3, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
with pytest.raises(KeyError, match="not in index"):
ser.loc[[3, 2, 3, "x"]]
result = ser.reindex([3, 2, 3, "x"])
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
with pytest.raises(KeyError, match="not in index"):
ser.loc[[2, 2, "x", 1]]
result = ser.reindex([2, 2, "x", 1])
tm.assert_series_equal(result, expected, check_index_type=True)
# raises as nothing is in the index
msg = (
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}'\)\] "
r"are in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
ser.loc[[3, 3, 3]]
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
with pytest.raises(KeyError, match="not in index"):
ser.loc[[2, 2, 3]]
result = ser.reindex([2, 2, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
with pytest.raises(KeyError, match="not in index"):
s.loc[[3, 4, 4]]
result = s.reindex([3, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
with pytest.raises(KeyError, match="not in index"):
s.loc[[5, 3, 3]]
result = s.reindex([5, 3, 3])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
with pytest.raises(KeyError, match="not in index"):
s.loc[[5, 4, 4]]
result = s.reindex([5, 4, 4])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
with pytest.raises(KeyError, match="not in index"):
s.loc[[7, 2, 2]]
result = s.reindex([7, 2, 2])
tm.assert_series_equal(result, expected, check_index_type=True)
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
with pytest.raises(KeyError, match="not in index"):
s.loc[[4, 5, 5]]
result = s.reindex([4, 5, 5])
tm.assert_series_equal(result, expected, check_index_type=True)
# iloc
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
def test_series_partial_set_with_name(self):
# GH 11497
idx = Index([1, 2], dtype="int64", name="idx")
ser = Series([0.1, 0.2], index=idx, name="s")
# loc
with pytest.raises(KeyError, match=r"\[3\] not in index"):
ser.loc[[3, 2, 3]]
with pytest.raises(KeyError, match=r"not in index"):
ser.loc[[3, 2, 3, "x"]]
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
result = ser.loc[[2, 2, 1]]
tm.assert_series_equal(result, expected, check_index_type=True)
with pytest.raises(KeyError, match=r"\['x'\] not in index"):
ser.loc[[2, 2, "x", 1]]
# raises as nothing is in the index
msg = (
rf"\"None of \[Index\(\[3, 3, 3\], dtype='{np.dtype(int)}', "
r"name='idx'\)\] are in the \[index\]\""
)
with pytest.raises(KeyError, match=msg):
ser.loc[[3, 3, 3]]
with pytest.raises(KeyError, match="not in index"):
ser.loc[[2, 2, 3]]
idx = Index([1, 2, 3], dtype="int64", name="idx")
with pytest.raises(KeyError, match="not in index"):
Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with pytest.raises(KeyError, match="not in index"):
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with pytest.raises(KeyError, match="not in index"):
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
with pytest.raises(KeyError, match="not in index"):
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
with pytest.raises(KeyError, match="not in index"):
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
# iloc
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
result = ser.iloc[[1, 1, 0, 0]]
tm.assert_series_equal(result, expected, check_index_type=True)
@pytest.mark.parametrize("key", [100, 100.0])
def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
# GH#4940 inserting non-strings
orig = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
df = orig.copy()
df.loc[key, :] = df.iloc[0]
ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name)
ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
tm.assert_frame_equal(df, expected)
def test_partial_set_invalid(self):
# GH 4940
# allow only setting of 'valid' values
orig = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
# allow object conversion here
df = orig.copy()
df.loc["a", :] = df.iloc[0]
ser = Series(df.iloc[0], name="a")
exp = pd.concat([orig, DataFrame(ser).T.infer_objects()])
tm.assert_frame_equal(df, exp)
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
assert df.index.dtype == "object"
@pytest.mark.parametrize(
"idx,labels,expected_idx",
[
(
period_range(start="2000", periods=20, freq="D"),
["2000-01-04", "2000-01-08", "2000-01-12"],
[
Period("2000-01-04", freq="D"),
Period("2000-01-08", freq="D"),
Period("2000-01-12", freq="D"),
],
),
(
date_range(start="2000", periods=20, freq="D"),
["2000-01-04", "2000-01-08", "2000-01-12"],
[
Timestamp("2000-01-04"),
Timestamp("2000-01-08"),
Timestamp("2000-01-12"),
],
),
(
pd.timedelta_range(start="1 day", periods=20),
["4D", "8D", "12D"],
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
),
],
)
def test_loc_with_list_of_strings_representing_datetimes(
self, idx, labels, expected_idx, frame_or_series
):
# GH 11278
obj = frame_or_series(range(20), index=idx)
expected_value = [3, 7, 11]
expected = frame_or_series(expected_value, expected_idx)
tm.assert_equal(expected, obj.loc[labels])
if frame_or_series is Series:
tm.assert_series_equal(expected, obj[labels])
@pytest.mark.parametrize(
"idx,labels",
[
(
period_range(start="2000", periods=20, freq="D"),
["2000-01-04", "2000-01-30"],
),
(
date_range(start="2000", periods=20, freq="D"),
["2000-01-04", "2000-01-30"],
),
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
],
)
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
self, idx, labels
):
# GH 11278
ser = Series(range(20), index=idx)
df = DataFrame(range(20), index=idx)
msg = r"not in index"
with pytest.raises(KeyError, match=msg):
ser.loc[labels]
with pytest.raises(KeyError, match=msg):
ser[labels]
with pytest.raises(KeyError, match=msg):
df.loc[labels]
@pytest.mark.parametrize(
"idx,labels,msg",
[
(
period_range(start="2000", periods=20, freq="D"),
Index(["4D", "8D"], dtype=object),
(
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
r"are in the \[index\]"
),
),
(
date_range(start="2000", periods=20, freq="D"),
Index(["4D", "8D"], dtype=object),
(
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
r"are in the \[index\]"
),
),
(
pd.timedelta_range(start="1 day", periods=20),
Index(["2000-01-04", "2000-01-08"], dtype=object),
(
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
r"dtype='object'\)\] are in the \[index\]"
),
),
],
)
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
self, idx, labels, msg
):
# GH 11278
ser = Series(range(20), index=idx)
df = DataFrame(range(20), index=idx)
with pytest.raises(KeyError, match=msg):
ser.loc[labels]
with pytest.raises(KeyError, match=msg):
ser[labels]
with pytest.raises(KeyError, match=msg):
df.loc[labels]
class TestStringSlicing:
def test_slice_irregular_datetime_index_with_nan(self):
# GH36953
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
df = DataFrame(range(len(index)), index=index)
expected = DataFrame(range(len(index[:3])), index=index[:3])
with pytest.raises(KeyError, match="non-existing keys is not allowed"):
# Upper bound is not in index (which is unordered)
# GH53983
# GH37819
df["2012-01-01":"2012-01-04"]
# Need this precision for right bound since the right slice
# bound is "rounded" up to the largest timepoint smaller than
# the next "resolution"-step of the provided point.
# e.g. 2012-01-03 is rounded up to 2012-01-04 - 1ns
result = df["2012-01-01":"2012-01-03 00:00:00.000000000"]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,303 @@
""" test scalar indexing, including at and iat """
from datetime import (
datetime,
timedelta,
)
import itertools
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
def generate_indices(f, values=False):
"""
generate the indices
if values is True , use the axis values
is False, use the range
"""
axes = f.axes
if values:
axes = (list(range(len(ax))) for ax in axes)
return itertools.product(*axes)
class TestScalar:
@pytest.mark.parametrize("kind", ["series", "frame"])
@pytest.mark.parametrize("col", ["ints", "uints"])
def test_iat_set_ints(self, kind, col, request):
f = request.getfixturevalue(f"{kind}_{col}")
indices = generate_indices(f, True)
for i in indices:
f.iat[i] = 1
expected = f.values[i]
tm.assert_almost_equal(expected, 1)
@pytest.mark.parametrize("kind", ["series", "frame"])
@pytest.mark.parametrize("col", ["labels", "ts", "floats"])
def test_iat_set_other(self, kind, col, request):
f = request.getfixturevalue(f"{kind}_{col}")
msg = "iAt based indexing can only have integer indexers"
with pytest.raises(ValueError, match=msg):
idx = next(generate_indices(f, False))
f.iat[idx] = 1
@pytest.mark.parametrize("kind", ["series", "frame"])
@pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"])
def test_at_set_ints_other(self, kind, col, request):
f = request.getfixturevalue(f"{kind}_{col}")
indices = generate_indices(f, False)
for i in indices:
f.at[i] = 1
expected = f.loc[i]
tm.assert_almost_equal(expected, 1)
class TestAtAndiAT:
# at and iat tests that don't need Base class
def test_float_index_at_iat(self):
ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
for el, item in ser.items():
assert ser.at[el] == item
for i in range(len(ser)):
assert ser.iat[i] == i + 1
def test_at_iat_coercion(self):
# as timestamp is not a tuple!
dates = date_range("1/1/2000", periods=8)
df = DataFrame(
np.random.default_rng(2).standard_normal((8, 4)),
index=dates,
columns=["A", "B", "C", "D"],
)
s = df["A"]
result = s.at[dates[5]]
xp = s.values[5]
assert result == xp
@pytest.mark.parametrize(
"ser, expected",
[
[
Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]"),
Timestamp("2014-02-02"),
],
[
Series(["1 days", "2 days"], dtype="timedelta64[ns]"),
Timedelta("2 days"),
],
],
)
def test_iloc_iat_coercion_datelike(self, indexer_ial, ser, expected):
# GH 7729
# make sure we are boxing the returns
result = indexer_ial(ser)[1]
assert result == expected
def test_imethods_with_dups(self):
# GH6493
# iat/iloc with dups
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
result = s.iloc[2]
assert result == 2
result = s.iat[2]
assert result == 2
msg = "index 10 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s.iat[10]
msg = "index -10 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
s.iat[-10]
result = s.iloc[[2, 3]]
expected = Series([2, 3], [2, 2], dtype="int64")
tm.assert_series_equal(result, expected)
df = s.to_frame()
result = df.iloc[2]
expected = Series(2, index=[0], name=2)
tm.assert_series_equal(result, expected)
result = df.iat[2, 0]
assert result == 2
def test_frame_at_with_duplicate_axes(self):
# GH#33041
arr = np.random.default_rng(2).standard_normal(6).reshape(3, 2)
df = DataFrame(arr, columns=["A", "A"])
result = df.at[0, "A"]
expected = df.iloc[0].copy()
tm.assert_series_equal(result, expected)
result = df.T.at["A", 0]
tm.assert_series_equal(result, expected)
# setter
df.at[1, "A"] = 2
expected = Series([2.0, 2.0], index=["A", "A"], name=1)
tm.assert_series_equal(df.iloc[1], expected)
def test_at_getitem_dt64tz_values(self):
# gh-15822
df = DataFrame(
{
"name": ["John", "Anderson"],
"date": [
Timestamp(2017, 3, 13, 13, 32, 56),
Timestamp(2017, 2, 16, 12, 10, 3),
],
}
)
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
result = df.loc[0, "date"]
assert result == expected
result = df.at[0, "date"]
assert result == expected
def test_mixed_index_at_iat_loc_iloc_series(self):
# GH 19860
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
for el, item in s.items():
assert s.at[el] == s.loc[el] == item
for i in range(len(s)):
assert s.iat[i] == s.iloc[i] == i + 1
with pytest.raises(KeyError, match="^4$"):
s.at[4]
with pytest.raises(KeyError, match="^4$"):
s.loc[4]
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
# GH 19860
df = DataFrame(
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
)
for rowIdx, row in df.iterrows():
for el, item in row.items():
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
for row in range(2):
for i in range(5):
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
with pytest.raises(KeyError, match="^3$"):
df.at[0, 3]
with pytest.raises(KeyError, match="^3$"):
df.loc[0, 3]
def test_iat_setter_incompatible_assignment(self):
# GH 23236
result = DataFrame({"a": [0.0, 1.0], "b": [4, 5]})
result.iat[0, 0] = None
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
tm.assert_frame_equal(result, expected)
def test_iat_dont_wrap_object_datetimelike():
# GH#32809 .iat calls go through DataFrame._get_value, should not
# call maybe_box_datetimelike
dti = date_range("2016-01-01", periods=3)
tdi = dti - dti
ser = Series(dti.to_pydatetime(), dtype=object)
ser2 = Series(tdi.to_pytimedelta(), dtype=object)
df = DataFrame({"A": ser, "B": ser2})
assert (df.dtypes == object).all()
for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]:
assert result is ser[0]
assert isinstance(result, datetime)
assert not isinstance(result, Timestamp)
for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]:
assert result is ser2[1]
assert isinstance(result, timedelta)
assert not isinstance(result, Timedelta)
def test_at_with_tuple_index_get():
# GH 26989
# DataFrame.at getter works with Index of tuples
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
assert df.index.nlevels == 1
assert df.at[(1, 2), "a"] == 1
# Series.at getter works with Index of tuples
series = df["a"]
assert series.index.nlevels == 1
assert series.at[(1, 2)] == 1
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_at_with_tuple_index_set():
# GH 26989
# DataFrame.at setter works with Index of tuples
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
assert df.index.nlevels == 1
df.at[(1, 2), "a"] = 2
assert df.at[(1, 2), "a"] == 2
# Series.at setter works with Index of tuples
series = df["a"]
assert series.index.nlevels == 1
series.at[1, 2] = 3
assert series.at[1, 2] == 3
class TestMultiIndexScalar:
def test_multiindex_at_get(self):
# GH 26989
# DataFrame.at and DataFrame.loc getter works with MultiIndex
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
assert df.index.nlevels == 2
assert df.at[(1, 3), "a"] == 1
assert df.loc[(1, 3), "a"] == 1
# Series.at and Series.loc getter works with MultiIndex
series = df["a"]
assert series.index.nlevels == 2
assert series.at[1, 3] == 1
assert series.loc[1, 3] == 1
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_multiindex_at_set(self):
# GH 26989
# DataFrame.at and DataFrame.loc setter works with MultiIndex
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
assert df.index.nlevels == 2
df.at[(1, 3), "a"] = 3
assert df.at[(1, 3), "a"] == 3
df.loc[(1, 3), "a"] = 4
assert df.loc[(1, 3), "a"] == 4
# Series.at and Series.loc setter works with MultiIndex
series = df["a"]
assert series.index.nlevels == 2
series.at[1, 3] = 5
assert series.at[1, 3] == 5
series.loc[1, 3] = 6
assert series.loc[1, 3] == 6
def test_multiindex_at_get_one_level(self):
# GH#38053
s2 = Series((0, 1), index=[[False, True]])
result = s2.at[False]
assert result == 0