Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,87 @@
import numpy as np
import pytest
from pandas._libs import index as libindex
from pandas.errors import SettingWithCopyError
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write):
# Inplace ops, originally from:
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
events = {
("eyes", "left"): a,
("eyes", "right"): b,
("ears", "left"): c,
("ears", "right"): d,
}
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
zed = DataFrame(events, index=["a", "b"], columns=multiind)
if using_copy_on_write:
with tm.raises_chained_assignment_error():
zed["eyes"]["right"].fillna(value=555, inplace=True)
elif warn_copy_on_write:
with tm.assert_produces_warning(None):
zed["eyes"]["right"].fillna(value=555, inplace=True)
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.assert_produces_warning(None):
zed["eyes"]["right"].fillna(value=555, inplace=True)
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
def test_cache_updating(using_copy_on_write, warn_copy_on_write):
# 5216
# make sure that we don't try to set a dead cache
a = np.random.default_rng(2).random((10, 3))
df = DataFrame(a, columns=["x", "y", "z"])
df_original = df.copy()
tuples = [(i, j) for i in range(5) for j in range(2)]
index = MultiIndex.from_tuples(tuples)
df.index = index
# setting via chained assignment
# but actually works, since everything is a view
with tm.raises_chained_assignment_error():
df.loc[0]["z"].iloc[0] = 1.0
if using_copy_on_write:
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
result = df.loc[(0, 0), "z"]
assert result == 1
# correct setting
df.loc[(0, 0), "z"] = 2
result = df.loc[(0, 0), "z"]
assert result == 2
def test_indexer_caching(monkeypatch):
# GH5727
# make sure that indexers are in the _internal_names_set
size_cutoff = 20
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
s = Series(np.zeros(size_cutoff), index=index)
# setitem
s[s == 0] = 1
expected = Series(np.ones(size_cutoff), index=index)
tm.assert_series_equal(s, expected)

View File

@ -0,0 +1,50 @@
from datetime import datetime
import numpy as np
from pandas import (
DataFrame,
Index,
MultiIndex,
Period,
Series,
period_range,
to_datetime,
)
import pandas._testing as tm
def test_multiindex_period_datetime():
# GH4861, using datetime in period of multiindex raises exception
idx1 = Index(["a", "a", "a", "b", "b"])
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
s = Series(np.random.default_rng(2).standard_normal(len(idx1)), [idx1, idx2])
# try Period as index
expected = s.iloc[0]
result = s.loc["a", Period("2012-01")]
assert result == expected
# try datetime as index
result = s.loc["a", datetime(2012, 1, 1)]
assert result == expected
def test_multiindex_datetime_columns():
# GH35015, using datetime as column indices raises exception
mi = MultiIndex.from_tuples(
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
)
df = DataFrame([], columns=mi)
expected_df = DataFrame(
[],
columns=MultiIndex.from_arrays(
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
),
)
tm.assert_frame_equal(df, expected_df)

View File

@ -0,0 +1,410 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.core.indexing import IndexingError
# ----------------------------------------------------------------------------
# test indexing of Series with multi-level Index
# ----------------------------------------------------------------------------
@pytest.mark.parametrize(
"access_method",
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
)
@pytest.mark.parametrize(
"level1_value, expected",
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
)
def test_series_getitem_multiindex(access_method, level1_value, expected):
# GH 6018
# series regression getitem with a multi-index
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
ser = Series([1, 2, 3], index=mi)
expected.index.name = "A"
result = access_method(ser, level1_value)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("level0_value", ["D", "A"])
def test_series_getitem_duplicates_multiindex(level0_value):
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
# the appropriate error, only in PY3 of course!
index = MultiIndex(
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
arr = np.random.default_rng(2).standard_normal((len(index), 1))
df = DataFrame(arr, index=index, columns=["val"])
# confirm indexing on missing value raises KeyError
if level0_value != "A":
with pytest.raises(KeyError, match=r"^'A'$"):
df.val["A"]
with pytest.raises(KeyError, match=r"^'X'$"):
df.val["X"]
result = df.val[level0_value]
expected = Series(
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
)
tm.assert_series_equal(result, expected)
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[42:65])
expected.index = expected.index.droplevel(0).droplevel(0)
result = indexer_sl(s)[2000, 3]
tm.assert_series_equal(result, expected)
def test_series_getitem_returns_scalar(
multiindex_year_month_day_dataframe_random_data, indexer_sl
):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.iloc[49]
result = indexer_sl(s)[2000, 3, 10]
assert result == expected
@pytest.mark.parametrize(
"indexer,expected_error,expected_error_msg",
[
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
(
lambda s: s.iloc[len(s)],
IndexError,
"single positional indexer is out-of-bounds",
),
],
)
def test_series_getitem_indexing_errors(
multiindex_year_month_day_dataframe_random_data,
indexer,
expected_error,
expected_error_msg,
):
s = multiindex_year_month_day_dataframe_random_data["A"]
with pytest.raises(expected_error, match=expected_error_msg):
indexer(s)
def test_series_getitem_corner_generator(
multiindex_year_month_day_dataframe_random_data,
):
s = multiindex_year_month_day_dataframe_random_data["A"]
result = s[(x > 0 for x in s)]
expected = s[s > 0]
tm.assert_series_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index
# ----------------------------------------------------------------------------
def test_getitem_simple(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data.T
expected = df.values[:, 0]
result = df["foo", "one"].values
tm.assert_almost_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_error_msg",
[
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
(lambda df: df["foobar"], r"^'foobar'$"),
],
)
def test_frame_getitem_simple_key_error(
multiindex_dataframe_random_data, indexer, expected_error_msg
):
df = multiindex_dataframe_random_data.T
with pytest.raises(KeyError, match=expected_error_msg):
indexer(df)
def test_tuple_string_column_names():
# GH#50372
mi = MultiIndex.from_tuples([("a", "aa"), ("a", "ab"), ("b", "ba"), ("b", "bb")])
df = DataFrame([range(4), range(1, 5), range(2, 6)], columns=mi)
df["single_index"] = 0
df_flat = df.copy()
df_flat.columns = df_flat.columns.to_flat_index()
df_flat["new_single_index"] = 0
result = df_flat[[("a", "aa"), "new_single_index"]]
expected = DataFrame(
[[0, 0], [1, 0], [2, 0]], columns=Index([("a", "aa"), "new_single_index"])
)
tm.assert_frame_equal(result, expected)
def test_frame_getitem_multicolumn_empty_level():
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
df.columns = [
["level1 item1", "level1 item2"],
["", "level2 item2"],
["level3 item1", "level3 item2"],
]
result = df["level1 item1"]
expected = DataFrame(
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected_slice",
[
(lambda df: df["foo"], slice(3)),
(lambda df: df["bar"], slice(3, 5)),
(lambda df: df.loc[:, "bar"], slice(3, 5)),
],
)
def test_frame_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice
):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mixed_depth_get():
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
result = df["a"]
expected = df["a", "", ""].rename("a")
tm.assert_series_equal(result, expected)
result = df["routine1", "result1"]
expected = df["routine1", "result1", ""]
expected = expected.rename(("routine1", "result1"))
tm.assert_series_equal(result, expected)
def test_frame_getitem_nan_multiindex(nulls_fixture):
# GH#29751
# loc on a multiindex containing nan values
n = nulls_fixture # for code readability
cols = ["a", "b", "c"]
df = DataFrame(
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
columns=cols,
).set_index(["a", "b"])
df["c"] = df["c"].astype("int64")
idx = (21, n)
result = df.loc[:idx]
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
result = df.loc[idx:]
expected = DataFrame(
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
idx1, idx2 = (21, n), (31, n)
result = df.loc[idx1:idx2]
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
expected["c"] = expected["c"].astype("int64")
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer,expected",
[
(
(["b"], ["bar", np.nan]),
(
DataFrame(
[[2, 3], [5, 6]],
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
dtype="int64",
)
),
),
(
(["a", "b"]),
(
DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples(
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
),
dtype="int64",
)
),
),
(
(["b"]),
(
DataFrame(
[[2, 3], [5, 6]],
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
dtype="int64",
)
),
),
(
(["b"], ["bar"]),
(
DataFrame(
[[2], [5]],
columns=MultiIndex.from_tuples([("b", "bar")]),
dtype="int64",
)
),
),
(
(["b"], [np.nan]),
(
DataFrame(
[[3], [6]],
columns=MultiIndex(
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
),
dtype="int64",
)
),
),
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
],
)
def test_frame_getitem_nan_cols_multiindex(
indexer,
expected,
nulls_fixture,
):
# Slicing MultiIndex including levels with nan values, for more information
# see GH#25154
df = DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples(
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
),
dtype="int64",
)
result = df.loc[:, indexer]
tm.assert_equal(result, expected)
# ----------------------------------------------------------------------------
# test indexing of DataFrame with multi-level Index with duplicates
# ----------------------------------------------------------------------------
@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
index = ["h1", "h3", "h5"]
columns = MultiIndex(
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=["main", "sub"],
)
return DataFrame(data, index=index, columns=columns)
@pytest.mark.parametrize(
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
)
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
df = dataframe_with_duplicate_index
index = Index(["h1", "h3", "h5"])
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
result = df["A"]["A1"]
tm.assert_series_equal(result, expected)
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame(
[["d", 4, 4], ["e", 5, 5]],
index=Index(["B2", "B2"], name="sub"),
columns=["h1", "h3", "h5"],
).T
result = df["A"]["B2"]
tm.assert_frame_equal(result, expected)
def test_frame_mi_empty_slice():
# GH 15454
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
result = df[[]]
expected = DataFrame(
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
)
tm.assert_frame_equal(result, expected)
def test_loc_empty_multiindex():
# GH#36936
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
# loc on empty multiindex == loc with False mask
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
result = df.loc[empty_multiindex, :]
expected = df.loc[[False] * len(df.index), :]
tm.assert_frame_equal(result, expected)
# replacing value with loc on empty multiindex
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
result = df
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,171 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def simple_multiindex_dataframe():
"""
Factory function to create simple 3 x 3 dataframe with
both columns and row MultiIndex using supplied data or
random data by default.
"""
data = np.random.default_rng(2).standard_normal((3, 3))
return DataFrame(
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
)
@pytest.mark.parametrize(
"indexer, expected",
[
(
lambda df: df.iloc[0],
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
),
(
lambda df: df.iloc[2],
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
),
(
lambda df: df.iloc[:, 2],
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
),
],
)
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
df = simple_multiindex_dataframe
arr = df.values
result = indexer(df)
expected = expected(arr)
tm.assert_series_equal(result, expected)
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
df = simple_multiindex_dataframe
result = df.iloc[[0, 1]]
expected = df.xs(4, drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_returns_scalar(simple_multiindex_dataframe):
df = simple_multiindex_dataframe
arr = df.values
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_iloc_getitem_multiple_items():
# GH 5528
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
index = MultiIndex.from_tuples(tup)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), index=index)
result = df.iloc[[2, 3]]
expected = df.xs("b", drop_level=False)
tm.assert_frame_equal(result, expected)
def test_iloc_getitem_labels():
# this is basically regular indexing
arr = np.random.default_rng(2).standard_normal((4, 3))
df = DataFrame(
arr,
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
)
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
def test_frame_getitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[:4]
expected = df[:4]
tm.assert_frame_equal(result, expected)
def test_frame_setitem_slice(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
df.iloc[:4] = 0
assert (df.values[:4] == 0).all()
assert (df.values[4:] != 0).all()
def test_indexing_ambiguity_bug_1678():
# GH 1678
columns = MultiIndex.from_tuples(
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
)
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
result = df.iloc[:, 1]
expected = df.loc[:, ("Ohio", "Red")]
tm.assert_series_equal(result, expected)
def test_iloc_integer_locations():
# GH 13797
data = [
["str00", "str01"],
["str10", "str11"],
["str20", "srt21"],
["str30", "str31"],
["str40", "str41"],
]
index = MultiIndex.from_tuples(
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
)
expected = DataFrame(data)
df = DataFrame(data, index=index)
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"data, indexes, values, expected_k",
[
# test without indexer value in first level of MultiIndex
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
# test like code sample 1 in the issue
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
# test like code sample 2 in the issue
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
# test like code sample 3 in the issue
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
],
)
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
# GH17148
df = DataFrame(data=data, columns=["i", "j", "k"])
df = df.set_index(["i", "j"])
series = df.k.copy()
for i, v in zip(indexes, values):
series.iloc[i] += v
df["k"] = expected_k
expected = df.k
tm.assert_series_equal(series, expected)
def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,118 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
@pytest.fixture
def m():
return 5
@pytest.fixture
def n():
return 100
@pytest.fixture
def cols():
return ["jim", "joe", "jolie", "joline", "jolia"]
@pytest.fixture
def vals(n):
vals = [
np.random.default_rng(2).integers(0, 10, n),
np.random.default_rng(2).choice(list("abcdefghij"), n),
np.random.default_rng(2).choice(
pd.date_range("20141009", periods=10).tolist(), n
),
np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
np.random.default_rng(2).standard_normal(n),
]
vals = list(map(tuple, zip(*vals)))
return vals
@pytest.fixture
def keys(n, m, vals):
# bunch of keys for testing
keys = [
np.random.default_rng(2).integers(0, 11, m),
np.random.default_rng(2).choice(list("abcdefghijk"), m),
np.random.default_rng(2).choice(
pd.date_range("20141009", periods=11).tolist(), m
),
np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
]
keys = list(map(tuple, zip(*keys)))
keys += [t[:-1] for t in vals[:: n // m]]
return keys
# covers both unique index and non-unique index
@pytest.fixture
def df(vals, cols):
return DataFrame(vals, columns=cols)
@pytest.fixture
def a(df):
return pd.concat([df, df])
@pytest.fixture
def b(df, cols):
return df.drop_duplicates(subset=cols[:-1])
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
# GH7724, GH2646
frame = request.getfixturevalue(frame_fixture)
if lexsort_depth == 0:
df = frame.copy(deep=False)
else:
df = frame.sort_values(by=cols[:lexsort_depth])
mi = df.set_index(cols[:-1])
assert not mi.index._lexsort_depth < lexsort_depth
for key in keys:
mask = np.ones(len(df), dtype=bool)
# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k
if not mask.any():
assert key[: i + 1] not in mi.index
continue
assert key[: i + 1] in mi.index
right = df[mask].copy(deep=False)
if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
right["jolia"].values, name=right.index[0], index=["jolia"]
)
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

View File

@ -0,0 +1,992 @@
import numpy as np
import pytest
from pandas.errors import (
IndexingError,
PerformanceWarning,
)
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture
def single_level_multiindex():
"""single level MultiIndex"""
return MultiIndex(
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
)
@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.default_rng(2).standard_normal((6, 2)), index=index)
class TestMultiIndexLoc:
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5
# with integer labels
df = frame.copy()
df.columns = list(range(3))
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7
def test_loc_getitem_general(self, any_real_numpy_dtype):
# GH#2817
dtype = any_real_numpy_dtype
data = {
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
"num": {0: 12, 1: 11, 2: 12, 3: 12, 4: 12},
}
df = DataFrame(data)
df = df.astype({"col": dtype, "num": dtype})
df = df.set_index(keys=["col", "num"])
key = 4.0, 12
# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
# this is ok
return_value = df.sort_index(inplace=True)
assert return_value is None
res = df.loc[key]
# col has float dtype, result should be float64 Index
col_arr = np.array([4.0] * 3, dtype=dtype)
year_arr = np.array([12] * 3, dtype=dtype)
index = MultiIndex.from_arrays([col_arr, year_arr], names=["col", "num"])
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
def test_loc_getitem_multiindex_missing_label_raises(self):
# GH#21593
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]
def test_loc_getitem_list_of_tuples_with_multiindex(
self, multiindex_year_month_day_dataframe_random_data
):
ser = multiindex_year_month_day_dataframe_random_data["A"]
expected = ser.reindex(ser.index[49:51])
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)
def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = Series([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
result = x.loc[[1, 3]]
tm.assert_series_equal(result, expected)
# GH15424
y1 = Series([1, 3], index=[1, 2])
result = x.loc[y1]
tm.assert_series_equal(result, expected)
empty = Series(data=[], dtype=np.float64)
expected = Series(
[],
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
dtype=np.float64,
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
x = Series(index=index, data=range(9), dtype=np.float64)
y = np.array([1, 3])
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
dtype=np.float64,
)
result = x.loc[y]
tm.assert_series_equal(result, expected)
# empty array:
empty = np.array([])
expected = Series(
[],
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
dtype="float64",
)
result = x.loc[empty]
tm.assert_series_equal(result, expected)
# 0-dim array (scalar):
scalar = np.int64(1)
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
result = x.loc[scalar]
tm.assert_series_equal(result, expected)
def test_loc_multiindex_labels(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[["i", "i", "j"], ["A", "A", "B"]],
index=[["i", "i", "j"], ["X", "X", "Y"]],
)
# the first 2 rows
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc["i"]
tm.assert_frame_equal(result, expected)
# 2nd (last) column
expected = df.iloc[:, [2]].droplevel(0, axis=1)
result = df.loc[:, "j"]
tm.assert_frame_equal(result, expected)
# bottom right corner
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
result = df.loc["j"].loc[:, "j"]
tm.assert_frame_equal(result, expected)
# with a tuple
expected = df.iloc[[0, 1]]
result = df.loc[("i", "X")]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_ints(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
expected = df.iloc[[0, 1]].droplevel(0)
result = df.loc[4]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_missing_label_raises(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
def test_loc_multiindex_list_missing_label(self, key, pos):
# GH 27148 - lists with missing labels _do_ raise
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)
with pytest.raises(KeyError, match="not in index"):
df.loc[key]
def test_loc_multiindex_too_many_dims_raises(self):
# GH 14885
s = Series(
range(8),
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
)
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
s.loc["a", "b"]
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
s.loc["a", "d", "g"]
with pytest.raises(IndexingError, match="Too many indexers"):
s.loc["a", "d", "g", "j"]
def test_loc_multiindex_indexer_none(self):
# GH6788
# multi-index indexer is None (meaning take all)
attributes = ["Attribute" + str(i) for i in range(1)]
attribute_values = ["Value" + str(i) for i in range(5)]
index = MultiIndex.from_product([attributes, attribute_values])
df = 0.1 * np.random.default_rng(2).standard_normal((10, 1 * 5)) + 0.5
df = DataFrame(df, columns=index)
result = df[attributes]
tm.assert_frame_equal(result, df)
# GH 7349
# loc with a multi-index seems to be doing fallback
df = DataFrame(
np.arange(12).reshape(-1, 1),
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
)
expected = df.loc[([1, 2],), :]
result = df.loc[[1, 2]]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_incomplete(self):
# GH 7399
# incomplete indexers
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.loc[:, "a":"c"]
result = s.loc[0:4, "a":"c"]
tm.assert_series_equal(result, expected)
result = s.loc[:4, "a":"c"]
tm.assert_series_equal(result, expected)
result = s.loc[0:, "a":"c"]
tm.assert_series_equal(result, expected)
# GH 7400
# multiindexer getitem with list of indexers skips wrong element
s = Series(
np.arange(15, dtype="int64"),
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
)
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
result = s.loc[2:4:2, "a":"c"]
tm.assert_series_equal(result, expected)
def test_get_loc_single_level(self, single_level_multiindex):
single_level = single_level_multiindex
s = Series(
np.random.default_rng(2).standard_normal(len(single_level)),
index=single_level,
)
for k in single_level.values:
s[k]
def test_loc_getitem_int_slice(self):
# GH 3053
# loc should treat integer slices like label slices
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
result = df.loc[6:8, :]
expected = df
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
df = DataFrame(np.random.default_rng(2).standard_normal((6, 6)), index, index)
result = df.loc[20:30, :]
expected = df.iloc[2:]
tm.assert_frame_equal(result, expected)
# doc examples
result = df.loc[10, :]
expected = df.iloc[0:2]
expected.index = ["a", "b"]
tm.assert_frame_equal(result, expected)
result = df.loc[:, 10]
expected = df[10]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
)
@pytest.mark.parametrize(
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
)
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
# GH #19686
# .loc should work with nested indexers which can be
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
def convert_nested_indexer(indexer_type, keys):
if indexer_type == np.ndarray:
return np.array(keys)
if indexer_type == slice:
return slice(*keys)
return indexer_type(keys)
a = [10, 20, 30]
b = [1, 2, 3]
index = MultiIndex.from_product([a, b])
df = DataFrame(
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
)
keys = ([10, 20], [2, 3])
types = (indexer_type_1, indexer_type_2)
# check indexers with all the combinations of nested objects
# of all the valid types
indexer = tuple(
convert_nested_indexer(indexer_type, k)
for indexer_type, k in zip(types, keys)
)
if indexer_type_1 is set or indexer_type_2 is set:
with pytest.raises(TypeError, match="as an indexer is not supported"):
df.loc[indexer, "Data"]
return
else:
result = df.loc[indexer, "Data"]
expected = Series(
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
)
tm.assert_series_equal(result, expected)
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
# GH#37711
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
obj = frame_or_series([1, 2], index=mi)
obj.loc[("a",)] = 0
expected = frame_or_series([0, 2], index=mi)
tm.assert_equal(obj, expected)
@pytest.mark.parametrize("indexer", [("a",), ("a")])
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
# GH#37711
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
obj = DataFrame([1, 2], index=mi)
obj.loc[indexer, :] = 0
expected = DataFrame([0, 2], index=mi)
tm.assert_frame_equal(obj, expected)
@pytest.mark.parametrize(
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
)
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
# GH#39147
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
df.loc[indexer, ["c", "d"]] = 1.0
expected = DataFrame(
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
index=mi,
columns=["a", "b", "c", "d"],
)
tm.assert_frame_equal(df, expected)
def test_sorted_multiindex_after_union(self):
# GH#44752
midx = MultiIndex.from_product(
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
)
ser1 = Series(1, index=midx)
ser2 = Series(1, index=midx[:2])
df = pd.concat([ser1, ser2], axis=1)
expected = df.copy()
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
df = DataFrame({0: ser1, 1: ser2})
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
result = df.loc["2011-01-01":"2011-01-02"]
tm.assert_frame_equal(result, expected)
def test_loc_no_second_level_index(self):
# GH#43599
df = DataFrame(
index=MultiIndex.from_product([list("ab"), list("cd"), list("e")]),
columns=["Val"],
)
res = df.loc[np.s_[:, "c", :]]
expected = DataFrame(
index=MultiIndex.from_product([list("ab"), list("e")]), columns=["Val"]
)
tm.assert_frame_equal(res, expected)
def test_loc_multi_index_key_error(self):
# GH 51892
df = DataFrame(
{
(1, 2): ["a", "b", "c"],
(1, 3): ["d", "e", "f"],
(2, 2): ["g", "h", "i"],
(2, 4): ["j", "k", "l"],
}
)
with pytest.raises(KeyError, match=r"(1, 4)"):
df.loc[0, (1, 4)]
@pytest.mark.parametrize(
"indexer, pos",
[
([], []), # empty ok
(["A"], slice(3)),
(["A", "D"], []), # "D" isn't present -> raise
(["D", "E"], []), # no values found -> raise
(["D"], []), # same, with single item list: GH 27148
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
],
)
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
# GH 7866
# multi-index slicing with missing indexers
idx = MultiIndex.from_product(
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
)
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
expected = ser.iloc[pos]
if expected.size == 0 and indexer != []:
with pytest.raises(KeyError, match=str(indexer)):
ser.loc[indexer]
elif indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], raising KeyError enforced in 2.0
with pytest.raises(KeyError, match="'bah'"):
ser.loc[indexer]
else:
result = ser.loc[indexer]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
# GH 8737
# empty indexer
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 6)),
index=range(5),
columns=multi_index,
)
df = df.sort_index(level=0, axis=1)
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
result = df.loc[:, columns_indexer]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
# regression from < 0.14.0
# GH 7914
df = DataFrame(
[[np.mean, np.median], ["mean", "median"]],
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
index=["function", "name"],
)
result = df.loc["function", ("functs", "mean")]
expected = np.mean
assert result == expected
def test_loc_getitem_tuple_plus_slice():
# GH 671
df = DataFrame(
{
"a": np.arange(10),
"b": np.arange(10),
"c": np.random.default_rng(2).standard_normal(10),
"d": np.random.default_rng(2).standard_normal(10),
}
).set_index(["a", "b"])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
def test_loc_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
with pytest.raises(KeyError, match=r"^3$"):
df.loc[3]
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
# test setup - check key not in dataframe
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
df.loc[("bar", "three"), "B"]
# in theory should be inserting in a sorted space????
df.loc[("bar", "three"), "B"] = 0
expected = 0
result = df.sort_index().loc[("bar", "three"), "B"]
assert result == expected
def test_loc_setitem_single_column_slice():
# case from https://github.com/pandas-dev/pandas/issues/27841
df = DataFrame(
"string",
index=list("abcd"),
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
)
df["labels"] = "a"
df.loc[:, "labels"] = df.index
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
# test with non-object block
df = DataFrame(
np.nan,
index=range(4),
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
)
expected = df.copy()
df.loc[:, "B"] = np.arange(4)
expected.iloc[:, 2] = np.arange(4)
tm.assert_frame_equal(df, expected)
def test_loc_nan_multiindex(using_infer_string):
# GH 5286
tups = [
("Good Things", "C", np.nan),
("Good Things", "R", np.nan),
("Bad Things", "C", np.nan),
("Bad Things", "T", np.nan),
("Okay Things", "N", "B"),
("Okay Things", "N", "D"),
("Okay Things", "B", np.nan),
("Okay Things", "D", np.nan),
]
df = DataFrame(
np.ones((8, 4)),
columns=Index(["d1", "d2", "d3", "d4"]),
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
)
result = df.loc["Good Things"].loc["C"]
expected = DataFrame(
np.ones((1, 4)),
index=Index(
[np.nan],
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
name="u3",
),
columns=Index(["d1", "d2", "d3", "d4"]),
)
tm.assert_frame_equal(result, expected)
def test_loc_period_string_indexing():
# GH 9892
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
i = (1111, 2222, 3333)
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
df = DataFrame(
index=idx,
columns=(
"OMS",
"OMK",
"RES",
"DRIFT_IND",
"OEVRIG_IND",
"FIN_IND",
"VARE_UD",
"LOEN_UD",
"FIN_UD",
),
)
result = df.loc[("2013Q1", 1111), "OMS"]
alt = df.loc[(a[0], 1111), "OMS"]
assert np.isnan(alt)
# Because the resolution of the string matches, it is an exact lookup,
# not a slice
assert np.isnan(result)
alt = df.loc[("2013Q1", 1111), "OMS"]
assert np.isnan(alt)
def test_loc_datetime_mask_slicing():
# GH 16699
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
df = DataFrame(
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
)
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
expected = Series(
[3],
name="C1",
index=MultiIndex.from_tuples(
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
names=["Idx1", "Idx2"],
),
)
tm.assert_series_equal(result, expected)
def test_loc_datetime_series_tuple_slicing():
# https://github.com/pandas-dev/pandas/issues/35858
date = pd.Timestamp("2000")
ser = Series(
1,
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
name="c",
)
result = ser.loc[:, [date]]
tm.assert_series_equal(result, ser)
def test_loc_with_mi_indexer():
# https://github.com/pandas-dev/pandas/issues/35351
df = DataFrame(
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
index=MultiIndex.from_tuples(
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
),
columns=["author", "price"],
)
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
result = df.loc[idx, :]
expected = DataFrame(
[["a", 1], ["b", 1], ["c", 2]],
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
columns=["author", "price"],
)
tm.assert_frame_equal(result, expected)
def test_loc_mi_with_level1_named_0():
# GH#37194
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
ser = Series(range(3), index=dti)
df = ser.to_frame()
df[1] = dti
df2 = df.set_index(0, append=True)
assert df2.index.names == (None, 0)
df2.index.get_loc(dti[0]) # smoke test
result = df2.loc[dti[0]]
expected = df2.iloc[[0]].droplevel(None)
tm.assert_frame_equal(result, expected)
ser2 = df2[1]
assert ser2.index.names == (None, 0)
result = ser2.loc[dti[0]]
expected = ser2.iloc[[0]].droplevel(None)
tm.assert_series_equal(result, expected)
def test_getitem_str_slice():
# GH#15928
df = DataFrame(
[
["20160525 13:30:00.023", "MSFT", "51.95", "51.95"],
["20160525 13:30:00.048", "GOOG", "720.50", "720.93"],
["20160525 13:30:00.076", "AAPL", "98.55", "98.56"],
["20160525 13:30:00.131", "AAPL", "98.61", "98.62"],
["20160525 13:30:00.135", "MSFT", "51.92", "51.95"],
["20160525 13:30:00.135", "AAPL", "98.61", "98.62"],
],
columns="time,ticker,bid,ask".split(","),
)
df2 = df.set_index(["ticker", "time"]).sort_index()
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
tm.assert_frame_equal(res, expected)
def test_3levels_leading_period_index():
# GH#24091
pi = pd.PeriodIndex(
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
name="datetime",
freq="D",
)
lev2 = ["A", "A", "Z", "W"]
lev3 = ["B", "C", "Q", "F"]
mi = MultiIndex.from_arrays([pi, lev2, lev3])
ser = Series(range(4), index=mi, dtype=np.float64)
result = ser.loc[(pi[0], "A", "B")]
assert result == 0.0
class TestKeyErrorsWithMultiIndex:
def test_missing_keys_raises_keyerror(self):
# GH#27420 KeyError, not TypeError
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
df2 = df.set_index(["A", "B"])
with pytest.raises(KeyError, match="1"):
df2.loc[(1, 6)]
def test_missing_key_raises_keyerror2(self):
# GH#21168 KeyError, not "IndexingError: Too many indexers"
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
with pytest.raises(KeyError, match=r"\(0, 3\)"):
ser.loc[0, 3]
def test_missing_key_combination(self):
# GH: 19556
mi = MultiIndex.from_arrays(
[
np.array(["a", "a", "b", "b"]),
np.array(["1", "2", "2", "3"]),
np.array(["c", "d", "c", "d"]),
],
names=["one", "two", "three"],
)
df = DataFrame(np.random.default_rng(2).random((4, 3)), index=mi)
msg = r"\('b', '1', slice\(None, None, None\)\)"
with pytest.raises(KeyError, match=msg):
df.loc[("b", "1", slice(None)), :]
with pytest.raises(KeyError, match=msg):
df.index.get_locs(("b", "1", slice(None)))
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
df.loc[("b", "1"), :]
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
df = multiindex_year_month_day_dataframe_random_data
ser = df["A"]
result = ser[2000, 5]
expected = df.loc[2000, 5]["A"]
tm.assert_series_equal(result, expected)
def test_loc_with_nan():
# GH: 27104
df = DataFrame(
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
).set_index(["ind1", "ind2"])
result = df.loc[["a"]]
expected = DataFrame(
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
)
tm.assert_frame_equal(result, expected)
result = df.loc["a"]
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
tm.assert_frame_equal(result, expected)
def test_getitem_non_found_tuple():
# GH: 25236
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
["a", "b", "c"]
)
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
df.loc[(2.0, 2.0, 3.0)]
def test_get_loc_datetime_index():
# GH#24263
index = pd.date_range("2001-01-01", periods=100)
mi = MultiIndex.from_arrays([index])
# Check if get_loc matches for Index and MultiIndex
assert mi.get_loc("2001-01") == slice(0, 31, None)
assert index.get_loc("2001-01") == slice(0, 31, None)
loc = mi[::2].get_loc("2001-01")
expected = index[::2].get_loc("2001-01")
assert loc == expected
loc = mi.repeat(2).get_loc("2001-01")
expected = index.repeat(2).get_loc("2001-01")
assert loc == expected
loc = mi.append(mi).get_loc("2001-01")
expected = index.append(index).get_loc("2001-01")
# TODO: standardize return type for MultiIndex.get_loc
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
def test_loc_setitem_indexer_differently_ordered():
# GH#34603
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
indexer = ("a", [1, 0])
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
tm.assert_frame_equal(df, expected)
def test_loc_getitem_index_differently_ordered_slice_none():
# GH#31330
df = DataFrame(
[[1, 2], [3, 4], [5, 6], [7, 8]],
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
columns=["a", "b"],
)
result = df.loc[(slice(None), [2, 1]), :]
expected = DataFrame(
[[3, 4], [7, 8], [1, 2], [5, 6]],
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
columns=["a", "b"],
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
# GH#40978
df = DataFrame(
[1] * 8,
index=MultiIndex.from_tuples(
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
),
columns=["a"],
)
result = df.loc[(slice(None), indexer), :]
expected = DataFrame(
[1] * 8,
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
columns=["a"],
)
tm.assert_frame_equal(result, expected)
result = df.loc[df.index.isin(indexer, level=1), :]
tm.assert_frame_equal(result, df)
def test_loc_getitem_drops_levels_for_one_row_dataframe():
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
df = DataFrame({"d": [0]}, index=mi)
expected = df.droplevel([0, 2])
result = df.loc["x", :, "z"]
tm.assert_frame_equal(result, expected)
ser = Series([0], index=mi)
result = ser.loc["x", :, "z"]
expected = Series([0], index=Index(["y"], name="b"))
tm.assert_series_equal(result, expected)
def test_mi_columns_loc_list_label_order():
# GH 10710
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
df = DataFrame(np.zeros((5, 6)), columns=cols)
result = df.loc[:, ["B", "A"]]
expected = DataFrame(
np.zeros((5, 4)),
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
)
tm.assert_frame_equal(result, expected)
def test_mi_partial_indexing_list_raises():
# GH 13501
frame = DataFrame(
np.arange(12).reshape((4, 3)),
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
)
frame.index.names = ["key1", "key2"]
frame.columns.names = ["state", "color"]
with pytest.raises(KeyError, match="\\[2\\] not in index"):
frame.loc[["b", 2], "Colorado"]
def test_mi_indexing_list_nonexistent_raises():
# GH 15452
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
s.loc[["not", "found"]]
def test_mi_add_cell_missing_row_non_unique():
# GH 16018
result = DataFrame(
[[1, 2, 5, 6], [3, 4, 7, 8]],
index=["a", "a"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
)
result.loc["c"] = -1
result.loc["c", (1, "A")] = 3
result.loc["d", (1, "A")] = 3
expected = DataFrame(
[
[1.0, 2.0, 5.0, 6.0],
[3.0, 4.0, 7.0, 8.0],
[3.0, -1.0, -1, -1],
[3.0, np.nan, np.nan, np.nan],
],
index=["a", "a", "c", "d"],
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
)
tm.assert_frame_equal(result, expected)
def test_loc_get_scalar_casting_to_float():
# GH#41369
df = DataFrame(
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
)
result = df.loc[(3, 4), "b"]
assert result == 2
assert isinstance(result, np.int64)
result = df.loc[[(3, 4)], "b"].iloc[0]
assert result == 2
assert isinstance(result, np.int64)
def test_loc_empty_single_selector_with_names():
# GH 19517
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
s2 = Series(index=idx, dtype=np.float64)
result = s2.loc["a"]
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
tm.assert_series_equal(result, expected)
def test_loc_keyerror_rightmost_key_missing():
# GH 20951
df = DataFrame(
{
"A": [100, 100, 200, 200, 300, 300],
"B": [10, 10, 20, 21, 31, 33],
"C": range(6),
}
)
df = df.set_index(["A", "B"])
with pytest.raises(KeyError, match="^1$"):
df.loc[(100, 1)]
def test_multindex_series_loc_with_tuple_label():
# GH#43908
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
ser = Series([1, 2], index=mi)
result = ser.loc[(3, (4, 5))]
assert result == 2

View File

@ -0,0 +1,235 @@
import numpy as np
import pytest
import pandas._libs.index as libindex
from pandas.errors import PerformanceWarning
import pandas as pd
from pandas import (
CategoricalDtype,
DataFrame,
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.core.arrays.boolean import BooleanDtype
class TestMultiIndexBasic:
def test_multiindex_perf_warn(self):
df = DataFrame(
{
"jim": [0, 0, 1, 1],
"joe": ["x", "x", "z", "y"],
"jolie": np.random.default_rng(2).random(4),
}
).set_index(["jim", "joe"])
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(1, "z")]
df = df.iloc[[2, 1, 3, 0]]
with tm.assert_produces_warning(PerformanceWarning):
df.loc[(0,)]
@pytest.mark.parametrize("offset", [-5, 5])
def test_indexing_over_hashtable_size_cutoff(self, monkeypatch, offset):
size_cutoff = 20
n = size_cutoff + offset
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
# hai it works!
assert s[("a", 5)] == 5
assert s[("a", 6)] == 6
assert s[("a", 7)] == 7
def test_multi_nan_indexing(self):
# GH 3588
df = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
result = df.set_index(["a", "b"], drop=False)
expected = DataFrame(
{
"a": ["R1", "R2", np.nan, "R4"],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
},
index=[
Index(["R1", "R2", np.nan, "R4"], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(result, expected)
def test_exclusive_nat_column_indexing(self):
# GH 38025
# test multi indexing when one column exclusively contains NaT values
df = DataFrame(
{
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
"b": ["C1", "C2", "C3", "C4"],
"c": [10, 15, np.nan, 20],
}
)
df = df.set_index(["a", "b"])
expected = DataFrame(
{
"c": [10, 15, np.nan, 20],
},
index=[
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
Index(["C1", "C2", "C3", "C4"], name="b"),
],
)
tm.assert_frame_equal(df, expected)
def test_nested_tuples_duplicates(self):
# GH#30892
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
idx = Index(["a", "a", "c"])
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
df2 = df.copy(deep=True)
df2.loc[(dti[0], "a"), "c2"] = 1.0
tm.assert_frame_equal(df2, expected)
df3 = df.copy(deep=True)
df3.loc[[(dti[0], "a")], "c2"] = 1.0
tm.assert_frame_equal(df3, expected)
def test_multiindex_with_datatime_level_preserves_freq(self):
# https://github.com/pandas-dev/pandas/issues/35563
idx = Index(range(2), name="A")
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
mi = MultiIndex.from_product([idx, dti])
df = DataFrame(np.random.default_rng(2).standard_normal((14, 2)), index=mi)
result = df.loc[0].index
tm.assert_index_equal(result, dti)
assert result.freq == dti.freq
def test_multiindex_complex(self):
# GH#42145
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
non_complex_data = [3, 4, 5]
result = DataFrame(
{
"x": complex_data,
"y": non_complex_data,
"z": non_complex_data,
}
)
result.set_index(["x", "y"], inplace=True)
expected = DataFrame(
{"z": non_complex_data},
index=MultiIndex.from_arrays(
[complex_data, non_complex_data],
names=("x", "y"),
),
)
tm.assert_frame_equal(result, expected)
def test_rename_multiindex_with_duplicates(self):
# GH 38015
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
df = DataFrame(index=mi)
df = df.rename(index={"A": "Apple"}, level=0)
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
expected = DataFrame(index=mi2)
tm.assert_frame_equal(df, expected)
def test_series_align_multiindex_with_nan_overlap_only(self):
# GH 38439
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
ser1 = Series([1, 2], index=mi1)
ser2 = Series([1, 2], index=mi2)
result1, result2 = ser1.align(ser2)
mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
expected1 = Series([1.0, np.nan, 2.0], index=mi)
expected2 = Series([np.nan, 2.0, 1.0], index=mi)
tm.assert_series_equal(result1, expected1)
tm.assert_series_equal(result2, expected2)
def test_series_align_multiindex_with_nan(self):
# GH 38439
mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
ser1 = Series([1, 2], index=mi1)
ser2 = Series([1, 2], index=mi2)
result1, result2 = ser1.align(ser2)
mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
expected1 = Series([1, 2], index=mi)
expected2 = Series([2, 1], index=mi)
tm.assert_series_equal(result1, expected1)
tm.assert_series_equal(result2, expected2)
def test_nunique_smoke(self):
# GH 34019
n = DataFrame([[1, 2], [1, 2]]).set_index([0, 1]).index.nunique()
assert n == 1
def test_multiindex_repeated_keys(self):
# GH19414
tm.assert_series_equal(
Series([1, 2], MultiIndex.from_arrays([["a", "b"]])).loc[
["a", "a", "b", "b"]
],
Series([1, 1, 2, 2], MultiIndex.from_arrays([["a", "a", "b", "b"]])),
)
def test_multiindex_with_na_missing_key(self):
# GH46173
df = DataFrame.from_dict(
{
("foo",): [1, 2, 3],
("bar",): [5, 6, 7],
(None,): [8, 9, 0],
}
)
with pytest.raises(KeyError, match="missing_key"):
df[[("missing_key",)]]
def test_multiindex_dtype_preservation(self):
# GH51261
columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"])
df = DataFrame(["value"], columns=columns).astype("category")
df_no_multiindex = df["A"]
assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype)
# geopandas 1763 analogue
df = DataFrame(
[[1, 0], [0, 1]],
columns=[
["foo", "foo"],
["location", "location"],
["x", "y"],
],
).assign(bools=Series([True, False], dtype="boolean"))
assert isinstance(df["bools"].dtype, BooleanDtype)
def test_multiindex_from_tuples_with_nan(self):
# GH#23578
result = MultiIndex.from_tuples([("a", "b", "c"), np.nan, ("d", "", "")])
expected = MultiIndex.from_tuples(
[("a", "b", "c"), (np.nan, np.nan, np.nan), ("d", "", "")]
)
tm.assert_index_equal(result, expected)

View File

@ -0,0 +1,269 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
DataFrame,
DatetimeIndex,
MultiIndex,
date_range,
)
import pandas._testing as tm
class TestMultiIndexPartial:
def test_getitem_partial_int(self):
# GH 12416
# with single item
l1 = [10, 20]
l2 = ["a", "b"]
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
expected = DataFrame(index=range(2), columns=l2)
result = df[20]
tm.assert_frame_equal(result, expected)
# with list
expected = DataFrame(
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
)
result = df[[20]]
tm.assert_frame_equal(result, expected)
# missing item:
with pytest.raises(KeyError, match="1"):
df[1]
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
df[[1]]
def test_series_slice_partial(self):
pass
def test_xs_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
ymd = multiindex_year_month_day_dataframe_random_data
result = frame.xs("foo")
result2 = frame.loc["foo"]
expected = frame.T["foo"].T
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)
result = ymd.xs((2000, 4))
expected = ymd.loc[2000, 4]
tm.assert_frame_equal(result, expected)
# ex from #1796
index = MultiIndex(
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
codes=[
[0, 0, 0, 0, 1, 1, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[0, 1, 0, 1, 0, 1, 0, 1],
],
)
df = DataFrame(
np.random.default_rng(2).standard_normal((8, 4)),
index=index,
columns=list("abcd"),
)
result = df.xs(("foo", "one"))
expected = df.loc["foo", "one"]
tm.assert_frame_equal(result, expected)
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
ymd = ymd.T
result = ymd[2000, 2]
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
expected.columns = expected.columns.droplevel(0).droplevel(0)
tm.assert_frame_equal(result, expected)
def test_fancy_slice_partial(
self,
multiindex_dataframe_random_data,
multiindex_year_month_day_dataframe_random_data,
):
frame = multiindex_dataframe_random_data
result = frame.loc["bar":"baz"]
expected = frame[3:7]
tm.assert_frame_equal(result, expected)
ymd = multiindex_year_month_day_dataframe_random_data
result = ymd.loc[(2000, 2):(2000, 4)]
lev = ymd.index.codes[1]
expected = ymd[(lev >= 1) & (lev <= 3)]
tm.assert_frame_equal(result, expected)
def test_getitem_partial_column_select(self):
idx = MultiIndex(
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
)
df = DataFrame(np.random.default_rng(2).random((3, 2)), index=idx)
result = df.loc[("a", "y"), :]
expected = df.loc[("a", "y")]
tm.assert_frame_equal(result, expected)
result = df.loc[("a", "y"), [1, 0]]
expected = df.loc[("a", "y")][[1, 0]]
tm.assert_frame_equal(result, expected)
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
df.loc[("a", "foo"), :]
# TODO(ArrayManager) rewrite test to not use .values
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
@td.skip_array_manager_invalid_test
def test_partial_set(
self,
multiindex_year_month_day_dataframe_random_data,
using_copy_on_write,
warn_copy_on_write,
):
# GH #397
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd.copy()
exp = ymd.copy()
df.loc[2000, 4] = 0
exp.iloc[65:85] = 0
tm.assert_frame_equal(df, exp)
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
with tm.raises_chained_assignment_error():
df["A"].loc[2000, 4] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)
df.loc[2000] = 5
exp.iloc[:100] = 5
tm.assert_frame_equal(df, exp)
# this works...for now
with tm.raises_chained_assignment_error():
df["A"].iloc[14] = 5
if using_copy_on_write:
assert df["A"].iloc[14] == exp["A"].iloc[14]
else:
assert df["A"].iloc[14] == 5
@pytest.mark.parametrize("dtype", [int, float])
def test_getitem_intkey_leading_level(
self, multiindex_year_month_day_dataframe_random_data, dtype
):
# GH#33355 dont fall-back to positional when leading level is int
ymd = multiindex_year_month_day_dataframe_random_data
levels = ymd.index.levels
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
ser = ymd["A"]
mi = ser.index
assert isinstance(mi, MultiIndex)
if dtype is int:
assert mi.levels[0].dtype == np.dtype(int)
else:
assert mi.levels[0].dtype == np.float64
assert 14 not in mi.levels[0]
assert not mi.levels[0]._should_fallback_to_positional
assert not mi._should_fallback_to_positional
with pytest.raises(KeyError, match="14"):
ser[14]
# ---------------------------------------------------------------------
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
expected = frame.copy()
result = frame.copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame.copy()
result = frame.copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_frame_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc[["foo", "bar"]] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
expected = frame["A"].copy()
result = frame["A"].copy()
result.loc["foo":"bar"] = 0
expected.loc["foo"] = 0
expected.loc["bar"] = 0
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"indexer, exp_idx, exp_values",
[
(
slice("2019-2", None),
DatetimeIndex(["2019-02-01"], dtype="M8[ns]"),
[2, 3],
),
(
slice(None, "2019-2"),
date_range("2019", periods=2, freq="MS"),
[0, 1, 2, 3],
),
],
)
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
# GH: 25165
date_idx = date_range("2019", periods=2, freq="MS")
df = DataFrame(
list(range(4)),
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
)
expected = DataFrame(
exp_values,
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
)
result = df[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc(axis=0)[indexer]
tm.assert_frame_equal(result, expected)
result = df.loc[indexer, :]
tm.assert_frame_equal(result, expected)
df2 = df.swaplevel(0, 1).sort_index()
expected = expected.swaplevel(0, 1).sort_index()
result = df2.loc[:, indexer, :]
tm.assert_frame_equal(result, expected)
def test_loc_getitem_partial_both_axis():
# gh-12660
iterables = [["a", "b"], [2, 1]]
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 4)), index=rows, columns=columns
)
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
result = df.loc["a", "b"]
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,589 @@
import numpy as np
import pytest
from pandas.errors import SettingWithCopyError
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
Series,
date_range,
isna,
notna,
)
import pandas._testing as tm
def assert_equal(a, b):
assert a == b
class TestMultiIndexSetItem:
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
target.loc[indexers] = value
result = target.loc[indexers]
if expected is None:
expected = value
compare_fn(result, expected)
def test_setitem_multiindex(self):
# GH#7190
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
index = MultiIndex.from_product(
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
)
t, n = 0, 2
df = DataFrame(
np.nan,
columns=cols,
index=index,
)
self.check(target=df, indexers=((t, n), "X"), value=0)
df = DataFrame(-999, columns=cols, index=index)
self.check(target=df, indexers=((t, n), "X"), value=1)
df = DataFrame(columns=cols, index=index)
self.check(target=df, indexers=((t, n), "X"), value=2)
# gh-7218: assigning with 0-dim arrays
df = DataFrame(-999, columns=cols, index=index)
self.check(
target=df,
indexers=((t, n), "X"),
value=np.array(3),
expected=3,
)
def test_setitem_multiindex2(self):
# GH#5206
df = DataFrame(
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
)
df["F"] = 99
row_selection = df["A"] % 2 == 0
col_selection = ["B", "C"]
df.loc[row_selection, col_selection] = df["F"]
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
self.check(
target=df,
indexers=(row_selection, col_selection),
value=df["F"],
compare_fn=tm.assert_frame_equal,
expected=output,
)
def test_setitem_multiindex3(self):
# GH#11372
idx = MultiIndex.from_product(
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
)
cols = MultiIndex.from_product(
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
)
df = DataFrame(
np.random.default_rng(2).random((12, 4)), index=idx, columns=cols
)
subidx = MultiIndex.from_arrays(
[["A", "A"], date_range("2015-01-01", "2015-02-01", freq="MS")]
)
subcols = MultiIndex.from_arrays(
[["foo", "foo"], date_range("2016-01-01", "2016-02-01", freq="MS")]
)
vals = DataFrame(
np.random.default_rng(2).random((2, 2)), index=subidx, columns=subcols
)
self.check(
target=df,
indexers=(subidx, subcols),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# set all columns
vals = DataFrame(
np.random.default_rng(2).random((2, 4)), index=subidx, columns=cols
)
self.check(
target=df,
indexers=(subidx, slice(None, None, None)),
value=vals,
compare_fn=tm.assert_frame_equal,
)
# identity
copy = df.copy()
self.check(
target=df,
indexers=(df.index, df.columns),
value=df,
compare_fn=tm.assert_frame_equal,
expected=copy,
)
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
@td.skip_array_manager_not_yet_implemented
def test_multiindex_setitem(self):
# GH 3738
# setting with a multi-index right hand side
arrays = [
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
np.array(["one", "two", "one", "one", "two", "one"]),
np.arange(0, 6, 1),
]
df_orig = DataFrame(
np.random.default_rng(2).standard_normal((6, 3)),
index=arrays,
columns=["A", "B", "C"],
).sort_index()
expected = df_orig.loc[["bar"]] * 2
df = df_orig.copy()
df.loc[["bar"]] *= 2
tm.assert_frame_equal(df.loc[["bar"]], expected)
# raise because these have differing levels
msg = "cannot align on a multi-index with out specifying the join levels"
with pytest.raises(TypeError, match=msg):
df.loc["bar"] *= 2
def test_multiindex_setitem2(self):
# from SO
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
df_orig = DataFrame.from_dict(
{
"price": {
("DE", "Coal", "Stock"): 2,
("DE", "Gas", "Stock"): 4,
("DE", "Elec", "Demand"): 1,
("FR", "Gas", "Stock"): 5,
("FR", "Solar", "SupIm"): 0,
("FR", "Wind", "SupIm"): 0,
}
}
)
df_orig.index = MultiIndex.from_tuples(
df_orig.index, names=["Sit", "Com", "Type"]
)
expected = df_orig.copy()
expected.iloc[[0, 1, 3]] *= 2
idx = pd.IndexSlice
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], :] *= 2
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, :, "Stock"], "price"] *= 2
tm.assert_frame_equal(df, expected)
def test_multiindex_assignment(self):
# GH3777 part 2
# mixed dtype
df = DataFrame(
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
)
df["d"] = np.nan
arr = np.array([0.0, 1.0])
df.loc[4, "d"] = arr
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
def test_multiindex_assignment_single_dtype(
self, using_copy_on_write, warn_copy_on_write
):
# GH3777 part 2b
# single dtype
arr = np.array([0.0, 1.0])
df = DataFrame(
np.random.default_rng(2).integers(5, 10, size=9).reshape(3, 3),
columns=list("abc"),
index=[[4, 4, 8], [8, 10, 12]],
dtype=np.int64,
)
view = df["c"].iloc[:2].values
# arr can be losslessly cast to int, so this setitem is inplace
# INFO(CoW-warn) this does not warn because we directly took .values
# above, so no reference to a pandas object is alive for `view`
df.loc[4, "c"] = arr
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
result = df.loc[4, "c"]
tm.assert_series_equal(result, exp)
# extra check for inplace-ness
if not using_copy_on_write:
tm.assert_numpy_array_equal(view, exp.values)
# arr + 0.5 cannot be cast losslessly to int, so we upcast
with tm.assert_produces_warning(
FutureWarning, match="item of incompatible dtype"
):
df.loc[4, "c"] = arr + 0.5
result = df.loc[4, "c"]
exp = exp + 0.5
tm.assert_series_equal(result, exp)
# scalar ok
with tm.assert_cow_warning(warn_copy_on_write):
df.loc[4, "c"] = 10
exp = Series(10, index=[8, 10], name="c", dtype="float64")
tm.assert_series_equal(df.loc[4, "c"], exp)
# invalid assignments
msg = "Must have equal len keys and value when setting with an iterable"
with pytest.raises(ValueError, match=msg):
df.loc[4, "c"] = [0, 1, 2, 3]
with pytest.raises(ValueError, match=msg):
df.loc[4, "c"] = [0]
# But with a length-1 listlike column indexer this behaves like
# `df.loc[4, "c"] = 0
with tm.assert_cow_warning(warn_copy_on_write):
df.loc[4, ["c"]] = [0]
assert (df.loc[4, "c"] == 0).all()
def test_groupby_example(self):
# groupby example
NUM_ROWS = 100
NUM_COLS = 10
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
index_cols = col_names[:5]
df = DataFrame(
np.random.default_rng(2).integers(5, size=(NUM_ROWS, NUM_COLS)),
dtype=np.int64,
columns=col_names,
)
df = df.set_index(index_cols).sort_index()
grp = df.groupby(level=index_cols[:4])
df["new_col"] = np.nan
# we are actually operating on a copy here
# but in this case, that's ok
for name, df2 in grp:
new_vals = np.arange(df2.shape[0])
df.loc[name, "new_col"] = new_vals
def test_series_setitem(
self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write
):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
with tm.assert_cow_warning(warn_copy_on_write):
s[2000, 3] = np.nan
assert isna(s.values[42:65]).all()
assert notna(s.values[:42]).all()
assert notna(s.values[65:]).all()
with tm.assert_cow_warning(warn_copy_on_write):
s[2000, 3, 10] = np.nan
assert isna(s.iloc[49])
with pytest.raises(KeyError, match="49"):
# GH#33355 dont fall-back to positional when leading level is int
s[49]
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()
values = df.values.copy()
result = df[df > 0]
expected = df.where(df > 0)
tm.assert_frame_equal(result, expected)
df[df > 0] = 5
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)
with pytest.raises(TypeError, match="boolean values only"):
df[df * 0] = 2
def test_frame_getitem_setitem_multislice(self):
levels = [["t1", "t2"], ["a", "b", "c"]]
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
result = df.loc[:, "value"]
tm.assert_series_equal(df["value"], result)
result = df.loc[df.index[1:3], "value"]
tm.assert_series_equal(df["value"][1:3], result)
result = df.loc[:, :]
tm.assert_frame_equal(df, result)
result = df
df.loc[:, "value"] = 10
result["value"] = 10
tm.assert_frame_equal(df, result)
df.loc[:, :] = 10
tm.assert_frame_equal(df, result)
def test_frame_setitem_multi_column(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=[["a", "a", "b", "b"], [0, 1, 0, 1]],
)
cp = df.copy()
cp["a"] = cp["b"]
tm.assert_frame_equal(cp["a"], cp["b"])
# set with ndarray
cp = df.copy()
cp["a"] = cp["b"].values
tm.assert_frame_equal(cp["a"], cp["b"])
def test_frame_setitem_multi_column2(self):
# ---------------------------------------
# GH#1803
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
df = DataFrame(index=[1, 3, 5], columns=columns)
# Works, but adds a column instead of updating the two existing ones
df["A"] = 0.0 # Doesn't work
assert (df["A"].values == 0).all()
# it broadcasts
df["B", "1"] = [1, 2, 3]
df["A"] = df["B", "1"]
sliced_a1 = df["A", "1"]
sliced_a2 = df["A", "2"]
sliced_b1 = df["B", "1"]
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
assert sliced_a1.name == ("A", "1")
assert sliced_a2.name == ("A", "2")
assert sliced_b1.name == ("B", "1")
def test_loc_getitem_tuple_plus_columns(
self, multiindex_year_month_day_dataframe_random_data
):
# GH #1013
ymd = multiindex_year_month_day_dataframe_random_data
df = ymd[:5]
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
index = MultiIndex(
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
)
obj = DataFrame(
np.random.default_rng(2).standard_normal((len(index), 4)),
index=index,
columns=["a", "b", "c", "d"],
)
obj = tm.get_obj(obj, frame_or_series)
res = obj.loc[1:2]
exp = obj.reindex(obj.index[2:])
tm.assert_equal(res, exp)
obj.loc[1:2] = 7
assert (obj.loc[1:2] == 7).values.all()
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
dft = frame.T
s = dft["foo", "two"]
dft["foo", "two"] = s > s.median()
tm.assert_series_equal(dft["foo", "two"], s > s.median())
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
reindexed = dft.reindex(columns=[("foo", "two")])
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
def test_set_column_scalar_with_loc(
self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
frame = multiindex_dataframe_random_data
subset = frame.index[[1, 4, 5]]
frame.loc[subset] = 99
assert (frame.loc[subset].values == 99).all()
frame_original = frame.copy()
col = frame["B"]
with tm.assert_cow_warning(warn_copy_on_write):
col[subset] = 97
if using_copy_on_write:
# chained setitem doesn't work with CoW
tm.assert_frame_equal(frame, frame_original)
else:
assert (frame.loc[subset, "B"] == 97).all()
def test_nonunique_assignment_1750(self):
df = DataFrame(
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
)
df = df.set_index(["A", "B"])
mi = MultiIndex.from_tuples([(1, 1)])
df.loc[mi, "C"] = "_"
assert (df.xs((1, 1))["C"] == "_").all()
def test_astype_assignment_with_dups(self):
# GH 4686
# assignment with dups that has a dtype change
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
index = df.index.copy()
df["A"] = df["A"].astype(np.float64)
tm.assert_index_equal(df.index, index)
def test_setitem_nonmonotonic(self):
# https://github.com/pandas-dev/pandas/issues/31449
index = MultiIndex.from_tuples(
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
)
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
expected = DataFrame({"e": [99, 1, 100]}, index=index)
tm.assert_frame_equal(df, expected)
class TestSetitemWithExpansionMultiIndex:
def test_setitem_new_column_mixed_depth(self):
arrays = [
["a", "top", "top", "routine1", "routine1", "routine2"],
["", "OD", "OD", "result1", "result2", "result1"],
["", "wx", "wy", "", "", ""],
]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.default_rng(2).standard_normal((4, 6)), columns=index)
result = df.copy()
expected = df.copy()
result["b"] = [1, 2, 3, 4]
expected["b", "", ""] = [1, 2, 3, 4]
tm.assert_frame_equal(result, expected)
def test_setitem_new_column_all_na(self):
# GH#1534
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
s = Series({(1, 1): 1, (1, 2): 2})
df["new"] = s
assert df["new"].isna().all()
def test_setitem_enlargement_keep_index_names(self):
# GH#53053
mi = MultiIndex.from_tuples([(1, 2, 3)], names=["i1", "i2", "i3"])
df = DataFrame(data=[[10, 20, 30]], index=mi, columns=["A", "B", "C"])
df.loc[(0, 0, 0)] = df.loc[(1, 2, 3)]
mi_expected = MultiIndex.from_tuples(
[(1, 2, 3), (0, 0, 0)], names=["i1", "i2", "i3"]
)
expected = DataFrame(
data=[[10, 20, 30], [10, 20, 30]],
index=mi_expected,
columns=["A", "B", "C"],
)
tm.assert_frame_equal(df, expected)
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
# is not a view
def test_frame_setitem_view_direct(
multiindex_dataframe_random_data, using_copy_on_write
):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
if using_copy_on_write:
with pytest.raises(ValueError, match="read-only"):
df["foo"].values[:] = 0
assert (df["foo"].values != 0).all()
else:
df["foo"].values[:] = 0
assert (df["foo"].values == 0).all()
def test_frame_setitem_copy_raises(
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
def test_frame_setitem_copy_no_write(
multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write
):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()
if using_copy_on_write or warn_copy_on_write:
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
with tm.raises_chained_assignment_error():
df["foo"]["one"] = 2
result = df
tm.assert_frame_equal(result, expected)
def test_frame_setitem_partial_multiindex():
# GH 54875
df = DataFrame(
{
"a": [1, 2, 3],
"b": [3, 4, 5],
"c": 6,
"d": 7,
}
).set_index(["a", "b", "c"])
ser = Series(8, index=df.index.droplevel("c"))
result = df.copy()
result["d"] = ser
expected = df.copy()
expected["d"] = 8
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,796 @@
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
from pandas.errors import UnsortedIndexError
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.tests.indexing.common import _mklbl
class TestMultiIndexSlicers:
def test_per_axis_per_level_getitem(self):
# GH6134
# example test case
ix = MultiIndex.from_product(
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
)
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
]
]
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
tm.assert_frame_equal(result, expected)
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df = df.sort_index(axis=0).sort_index(axis=1)
# identity
result = df.loc[(slice(None), slice(None)), :]
tm.assert_frame_equal(result, df)
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
result = df.loc[:, (slice(None), slice(None))]
tm.assert_frame_equal(result, df)
# index
result = df.loc[(slice(None), [1]), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), 1), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# columns
result = df.loc[:, (slice(None), ["foo"])]
expected = df.iloc[:, [1, 3]]
tm.assert_frame_equal(result, expected)
# both
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
expected = df.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(result, expected)
result = df.loc["A", "a"]
expected = DataFrame(
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
index=Index([1, 2, 3], name="two"),
columns=Index(["bar", "foo"], name="lvl1"),
)
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), [1, 2]), :]
expected = df.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# multi-level series
s = Series(np.arange(len(ix.to_numpy())), index=ix)
result = s.loc["A1":"A3", :, ["C1", "C3"]]
expected = s.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in s.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_series_equal(result, expected)
# boolean indexers
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
expected = df.iloc[[2, 3]]
tm.assert_frame_equal(result, expected)
msg = (
"cannot index with a boolean indexer "
"that is not the same length as the index"
)
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), np.array([True, False])), :]
with pytest.raises(KeyError, match=r"\[1\] not in index"):
# slice(None) is on the index, [1] is on the columns, but 1 is
# not in the columns, so we raise
# This used to treat [1] as positional GH#16396
df.loc[slice(None), [1]]
# not lexsorted
assert df.index._lexsort_depth == 2
df = df.sort_index(level=1, axis=0)
assert df.index._lexsort_depth == 0
msg = (
"MultiIndex slicing requires the index to be "
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc[(slice(None), slice("bar")), :]
# GH 16734: not sorted, but no real slicing
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
def test_multiindex_slicers_non_unique(self):
# GH 7106
# non-unique mi index support
df = (
DataFrame(
{
"A": ["foo", "foo", "foo", "foo"],
"B": ["a", "a", "a", "a"],
"C": [1, 2, 1, 3],
"D": [1, 2, 3, 4],
}
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
tm.assert_frame_equal(result, expected)
# this is equivalent of an xs expression
result = df.xs(1, level=2, drop_level=False)
tm.assert_frame_equal(result, expected)
df = (
DataFrame(
{
"A": ["foo", "foo", "foo", "foo"],
"B": ["a", "a", "a", "a"],
"C": [1, 2, 1, 2],
"D": [1, 2, 3, 4],
}
)
.set_index(["A", "B", "C"])
.sort_index()
)
assert not df.index.is_unique
expected = (
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
.set_index(["A", "B", "C"])
.sort_index()
)
result = df.loc[(slice(None), slice(None), 1), :]
assert not result.index.is_unique
tm.assert_frame_equal(result, expected)
# GH12896
# numpy-implementation dependent bug
ints = [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
12,
13,
14,
14,
16,
17,
18,
19,
200000,
200000,
]
n = len(ints)
idx = MultiIndex.from_arrays([["a"] * n, ints])
result = Series([1] * n, index=idx)
result = result.sort_index()
result = result.loc[(slice(None), slice(100000))]
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
tm.assert_series_equal(result, expected)
def test_multiindex_slicers_datetimelike(self):
# GH 7429
# buggy/inconsistent behavior when slicing with datetime-like
dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
freq = [1, 2]
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
df = DataFrame(
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
index=index,
columns=list("ABCD"),
)
# multi-axis slicing
idx = pd.IndexSlice
expected = df.iloc[[0, 2, 4], [0, 1]]
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
slice(1, 1),
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
idx[
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
],
idx[1:1],
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(
slice(
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
),
1,
),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
# with strings
result = df.loc[
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
slice("A", "B"),
]
tm.assert_frame_equal(result, expected)
result = df.loc[
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
]
tm.assert_frame_equal(result, expected)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
df = DataFrame(
{
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
"DATE": [
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-09-03",
"2013-10-01",
"2013-07-09",
"2013-08-06",
"2013-09-03",
],
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
}
)
df["DATE"] = pd.to_datetime(df["DATE"])
df1 = df.set_index(["A", "B", "DATE"])
df1 = df1.sort_index()
# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice("A1")), :]
expected = df1.iloc[0:10]
tm.assert_frame_equal(result, expected)
# A2 - Get all values from the start to "A2"
result = df1.loc[(slice("A2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None), slice("B1", "B2")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
tm.assert_frame_equal(result, expected)
# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice("A2"), slice("B0")), :]
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
# the As)
result = df1.loc[(slice(None), slice("B2")), :]
expected = df1
tm.assert_frame_equal(result, expected)
# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
tm.assert_frame_equal(result, expected)
# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
expected = df1.iloc[[1, 2, 6, 7, 12]]
tm.assert_frame_equal(result, expected)
def test_per_axis_per_level_doc_examples(self):
# test index maker
idx = pd.IndexSlice
# from indexing.rst / advanced
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc[idx[:, :, ["C1", "C3"]], :]
tm.assert_frame_equal(result, expected)
# not sorted
msg = (
"MultiIndex slicing requires the index to be lexsorted: "
r"slicing on levels \[1\], lexsort depth 1"
)
with pytest.raises(UnsortedIndexError, match=msg):
df.loc["A1", ("a", slice("foo"))]
# GH 16734: not sorted, but no real slicing
tm.assert_frame_equal(
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
)
df = df.sort_index(axis=1)
# slicing
df.loc["A1", (slice(None), "foo")]
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
# setitem
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
def test_loc_axis_arguments(self):
index = MultiIndex.from_product(
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df = (
DataFrame(
np.arange(len(index) * len(columns), dtype="int64").reshape(
(len(index), len(columns))
),
index=index,
columns=columns,
)
.sort_index()
.sort_index(axis=1)
)
# axis 0
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if a in ("A1", "A2", "A3") and c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
expected = df.loc[
[
(
a,
b,
c,
d,
)
for a, b, c, d in df.index.values
if c in ("C1", "C3")
]
]
tm.assert_frame_equal(result, expected)
# axis 1
result = df.loc(axis=1)[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
result = df.loc(axis="columns")[:, "foo"]
expected = df.loc[:, (slice(None), "foo")]
tm.assert_frame_equal(result, expected)
# invalid axis
for i in [-1, 2, "foo"]:
msg = f"No axis named {i} for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.loc(axis=i)[:, :, ["C1", "C3"]]
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
# GH29519
df = DataFrame(
np.arange(27).reshape(3, 9),
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
)
result = df.loc(axis=1)["a1":"a2"]
expected = df.iloc[:, :-3]
tm.assert_frame_equal(result, expected)
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
# GH29519
df = DataFrame(
np.arange(27).reshape(3, 9),
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
)
result = df.loc(axis=1)["a1"]
expected = df.iloc[:, :3]
expected.columns = ["b1", "b2", "b3"]
tm.assert_frame_equal(result, expected)
def test_loc_ax_single_level_indexer_simple_df(self):
# GH29519
# test single level indexing on single index column data frame
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
result = df.loc(axis=1)["a"]
expected = Series(np.array([0, 3, 6]), name="a")
tm.assert_series_equal(result, expected)
def test_per_axis_per_level_setitem(self):
# test index maker
idx = pd.IndexSlice
# test multi-index slicing with per axis and per index controls
index = MultiIndex.from_tuples(
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
)
columns = MultiIndex.from_tuples(
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
names=["lvl0", "lvl1"],
)
df_orig = DataFrame(
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
)
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
# identity
df = df_orig.copy()
df.loc[(slice(None), slice(None)), :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, :] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[:, (slice(None), slice(None))] = 100
expected = df_orig.copy()
expected.iloc[:, :] = 100
tm.assert_frame_equal(df, expected)
# index
df = df_orig.copy()
df.loc[(slice(None), [1]), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), :] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc(axis=0)[:, 1] = 100
expected = df_orig.copy()
expected.iloc[[0, 3]] = 100
tm.assert_frame_equal(df, expected)
# columns
df = df_orig.copy()
df.loc[:, (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[:, [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# both
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc["A", "a"] = 100
expected = df_orig.copy()
expected.iloc[0:3, 0:2] = 100
tm.assert_frame_equal(df, expected)
# setting with a list-like
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100, 100], [100, 100]], dtype="int64"
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = 100
tm.assert_frame_equal(df, expected)
# not enough values
df = df_orig.copy()
msg = "setting an array element with a sequence."
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[[100], [100, 100]], dtype="int64"
)
msg = "Must have equal len keys and value when setting with an iterable"
with pytest.raises(ValueError, match=msg):
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
[100, 100, 100, 100], dtype="int64"
)
# with an alignable rhs
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
)
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
tm.assert_frame_equal(df, expected)
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
(slice(None), 1), (slice(None), ["foo"])
]
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
rhs.loc[:, ("c", "bah")] = 10
df = df_orig.copy()
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
expected = df_orig.copy()
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
tm.assert_frame_equal(df, expected)
def test_multiindex_label_slicing_with_negative_step(self):
ser = Series(
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
)
SLC = pd.IndexSlice
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
tm.assert_indexing_slices_equivalent(
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
)
def test_multiindex_slice_first_level(self):
# GH 12697
freq = ["a", "b", "c", "d"]
idx = MultiIndex.from_product([freq, range(500)])
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
result = df_slice.loc["a"]
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
tm.assert_frame_equal(result, expected)
result = df_slice.loc["d"]
expected = DataFrame(
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
)
tm.assert_frame_equal(result, expected)
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
ymd = multiindex_year_month_day_dataframe_random_data
s = ymd["A"]
result = s[5:]
expected = s.reindex(s.index[5:])
tm.assert_series_equal(result, expected)
s = ymd["A"].copy()
exp = ymd["A"].copy()
s[5:] = 0
exp.iloc[5:] = 0
tm.assert_numpy_array_equal(s.values, exp.values)
result = ymd[5:]
expected = ymd.reindex(s.index[5:])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"dtype, loc, iloc",
[
# dtype = int, step = -1
("int", slice(None, None, -1), slice(None, None, -1)),
("int", slice(3, None, -1), slice(3, None, -1)),
("int", slice(None, 1, -1), slice(None, 0, -1)),
("int", slice(3, 1, -1), slice(3, 0, -1)),
# dtype = int, step = -2
("int", slice(None, None, -2), slice(None, None, -2)),
("int", slice(3, None, -2), slice(3, None, -2)),
("int", slice(None, 1, -2), slice(None, 0, -2)),
("int", slice(3, 1, -2), slice(3, 0, -2)),
# dtype = str, step = -1
("str", slice(None, None, -1), slice(None, None, -1)),
("str", slice("d", None, -1), slice(3, None, -1)),
("str", slice(None, "b", -1), slice(None, 0, -1)),
("str", slice("d", "b", -1), slice(3, 0, -1)),
# dtype = str, step = -2
("str", slice(None, None, -2), slice(None, None, -2)),
("str", slice("d", None, -2), slice(3, None, -2)),
("str", slice(None, "b", -2), slice(None, 0, -2)),
("str", slice("d", "b", -2), slice(3, 0, -2)),
],
)
def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
# GH#38071
labels = {
"str": list("abcde"),
"int": range(5),
}[dtype]
mi = MultiIndex.from_arrays([labels] * 2)
df = DataFrame(1.0, index=mi, columns=["A"])
SLC = pd.IndexSlice
expected = df.iloc[iloc, :]
result_get_loc = df.loc[SLC[loc], :]
result_get_locs_level_0 = df.loc[SLC[loc, :], :]
result_get_locs_level_1 = df.loc[SLC[:, loc], :]
tm.assert_frame_equal(result_get_loc, expected)
tm.assert_frame_equal(result_get_locs_level_0, expected)
tm.assert_frame_equal(result_get_locs_level_1, expected)

View File

@ -0,0 +1,153 @@
import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
MultiIndex,
Series,
array,
)
import pandas._testing as tm
class TestMultiIndexSorted:
def test_getitem_multilevel_index_tuple_not_sorted(self):
index_columns = list("abc")
df = DataFrame(
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
)
df = df.set_index(index_columns)
query_index = df.index[:1]
rs = df.loc[query_index, "data"]
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
xp = Series(["x"], index=xp_idx, name="data")
tm.assert_series_equal(rs, xp)
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.sort_index(level=1).T
# buglet with int typechecking
result = df.iloc[:, : np.int32(3)]
expected = df.reindex(columns=df.columns[:3])
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("key", [None, lambda x: x])
def test_frame_getitem_not_sorted2(self, key):
# 13431
df = DataFrame(
{
"col1": ["b", "d", "b", "a"],
"col2": [3, 1, 1, 2],
"data": ["one", "two", "three", "four"],
}
)
df2 = df.set_index(["col1", "col2"])
df2_original = df2.copy()
df2.index = df2.index.set_levels(["b", "d", "a"], level="col1")
df2.index = df2.index.set_codes([0, 1, 0, 2], level="col1")
assert not df2.index.is_monotonic_increasing
assert df2_original.index.equals(df2.index)
expected = df2.sort_index(key=key)
assert expected.index.is_monotonic_increasing
result = df2.sort_index(level=0, key=key)
assert result.index.is_monotonic_increasing
tm.assert_frame_equal(result, expected)
def test_sort_values_key(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
index = index.sort_values( # sort by third letter
key=lambda x: x.map(lambda entry: entry[2])
)
result = DataFrame(range(8), index=index)
arrays = [
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
expected = DataFrame(range(8), index=index)
tm.assert_frame_equal(result, expected)
def test_argsort_with_na(self):
# GH48495
arrays = [
array([2, NA, 1], dtype="Int64"),
array([1, 2, 3], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
result = index.argsort()
expected = np.array([2, 0, 1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_sort_values_with_na(self):
# GH48495
arrays = [
array([2, NA, 1], dtype="Int64"),
array([1, 2, 3], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
index = index.sort_values()
result = DataFrame(range(3), index=index)
arrays = [
array([1, 2, NA], dtype="Int64"),
array([3, 1, 2], dtype="Int64"),
]
index = MultiIndex.from_arrays(arrays)
expected = DataFrame(range(3), index=index)
tm.assert_frame_equal(result, expected)
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T
df["foo", "four"] = "foo"
arrays = [np.array(x) for x in zip(*df.columns.values)]
result = df["foo"]
result2 = df.loc[:, "foo"]
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
df = df.T
result = df.xs("foo")
result2 = df.loc["foo"]
expected = df.reindex(df.index[arrays[0] == "foo"])
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
def test_series_getitem_not_sorted(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
arrays = [np.array(x) for x in zip(*index.values)]
result = s["qux"]
result2 = s.loc["qux"]
expected = s[arrays[0] == "qux"]
expected.index = expected.index.droplevel(0)
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)