Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,199 @@
"""
Tests for values coercion in setitem-like operations on DataFrame.
For the most part, these should be multi-column DataFrames, otherwise
we would share the tests with Series.
"""
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestDataFrameSetitemCoercion:
@pytest.mark.parametrize("consolidate", [True, False])
def test_loc_setitem_multiindex_columns(self, consolidate):
# GH#18415 Setting values in a single column preserves dtype,
# while setting them in multiple columns did unwanted cast.
# Note that A here has 2 blocks, below we do the same thing
# with a consolidated frame.
A = DataFrame(np.zeros((6, 5), dtype=np.float32))
A = pd.concat([A, A], axis=1, keys=[1, 2])
if consolidate:
A = A._consolidate()
A.loc[2:3, (1, slice(2, 3))] = np.ones((2, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
A.loc[0:5, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
A.loc[:, (1, slice(2, 3))] = np.ones((6, 2), dtype=np.float32)
assert (A.dtypes == np.float32).all()
# TODO: i think this isn't about MultiIndex and could be done with iloc?
def test_37477():
# fixed by GH#45121
orig = DataFrame({"A": [1, 2, 3], "B": [3, 4, 5]})
expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]})
df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
df.at[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
df.loc[1, "B"] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
df.iat[1, 1] = 1.2
tm.assert_frame_equal(df, expected)
df = orig.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
df.iloc[1, 1] = 1.2
tm.assert_frame_equal(df, expected)
def test_6942(indexer_al):
# check that the .at __setitem__ after setting "Live" actually sets the data
start = Timestamp("2014-04-01")
t1 = Timestamp("2014-04-23 12:42:38.883082")
t2 = Timestamp("2014-04-24 01:33:30.040039")
dti = date_range(start, periods=1)
orig = DataFrame(index=dti, columns=["timenow", "Live"])
df = orig.copy()
indexer_al(df)[start, "timenow"] = t1
df["Live"] = True
df.at[start, "timenow"] = t2
assert df.iloc[0, 0] == t2
def test_26395(indexer_al):
# .at case fixed by GH#45121 (best guess)
df = DataFrame(index=["A", "B", "C"])
df["D"] = 0
indexer_al(df)["C", "D"] = 2
expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64)
tm.assert_frame_equal(df, expected)
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
indexer_al(df)["C", "D"] = 44.5
expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64)
tm.assert_frame_equal(df, expected)
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
indexer_al(df)["C", "D"] = "hello"
expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object)
tm.assert_frame_equal(df, expected)
@pytest.mark.xfail(reason="unwanted upcast")
def test_15231():
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
df.loc[2] = Series({"a": 5, "b": 6})
assert (df.dtypes == np.int64).all()
df.loc[3] = Series({"a": 7})
# df["a"] doesn't have any NaNs, should not have been cast
exp_dtypes = Series([np.int64, np.float64], dtype=object, index=["a", "b"])
tm.assert_series_equal(df.dtypes, exp_dtypes)
def test_iloc_setitem_unnecesssary_float_upcasting():
# GH#12255
df = DataFrame(
{
0: np.array([1, 3], dtype=np.float32),
1: np.array([2, 4], dtype=np.float32),
2: ["a", "b"],
}
)
orig = df.copy()
values = df[0].values.reshape(2, 1)
df.iloc[:, 0:1] = values
tm.assert_frame_equal(df, orig)
@pytest.mark.xfail(reason="unwanted casting to dt64")
def test_12499():
# TODO: OP in GH#12499 used np.datetim64("NaT") instead of pd.NaT,
# which has consequences for the expected df["two"] (though i think at
# the time it might not have because of a separate bug). See if it makes
# a difference which one we use here.
ts = Timestamp("2016-03-01 03:13:22.98986", tz="UTC")
data = [{"one": 0, "two": ts}]
orig = DataFrame(data)
df = orig.copy()
df.loc[1] = [np.nan, NaT]
expected = DataFrame(
{"one": [0, np.nan], "two": Series([ts, NaT], dtype="datetime64[ns, UTC]")}
)
tm.assert_frame_equal(df, expected)
data = [{"one": 0, "two": ts}]
df = orig.copy()
df.loc[1, :] = [np.nan, NaT]
tm.assert_frame_equal(df, expected)
def test_20476():
mi = MultiIndex.from_product([["A", "B"], ["a", "b", "c"]])
df = DataFrame(-1, index=range(3), columns=mi)
filler = DataFrame([[1, 2, 3.0]] * 3, index=range(3), columns=["a", "b", "c"])
df["A"] = filler
expected = DataFrame(
{
0: [1, 1, 1],
1: [2, 2, 2],
2: [3.0, 3.0, 3.0],
3: [-1, -1, -1],
4: [-1, -1, -1],
5: [-1, -1, -1],
}
)
expected.columns = mi
exp_dtypes = Series(
[np.dtype(np.int64)] * 2 + [np.dtype(np.float64)] + [np.dtype(np.int64)] * 3,
index=mi,
)
tm.assert_series_equal(df.dtypes, exp_dtypes)

View File

@ -0,0 +1,60 @@
import re
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
)
class TestDataFrameDelItem:
def test_delitem(self, float_frame):
del float_frame["A"]
assert "A" not in float_frame
def test_delitem_multiindex(self):
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
df = DataFrame(np.random.default_rng(2).standard_normal((4, 4)), columns=midx)
assert len(df.columns) == 4
assert ("A",) in df.columns
assert "A" in df.columns
result = df["A"]
assert isinstance(result, DataFrame)
del df["A"]
assert len(df.columns) == 2
# A still in the levels, BUT get a KeyError if trying
# to delete
assert ("A",) not in df.columns
with pytest.raises(KeyError, match=re.escape("('A',)")):
del df[("A",)]
# behavior of dropped/deleted MultiIndex levels changed from
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
# levels which are dropped/deleted
assert "A" not in df.columns
with pytest.raises(KeyError, match=re.escape("('A',)")):
del df["A"]
def test_delitem_corner(self, float_frame):
f = float_frame.copy()
del f["D"]
assert len(f.columns) == 3
with pytest.raises(KeyError, match=r"^'D'$"):
del f["D"]
del f["B"]
assert len(f.columns) == 2
def test_delitem_col_still_multiindex(self):
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
tuples = sorted(zip(*arrays))
index = MultiIndex.from_tuples(tuples)
df = DataFrame(np.random.default_rng(2).standard_normal((3, 4)), columns=index)
del df[("a", "", "")]
assert isinstance(df.columns, MultiIndex)

View File

@ -0,0 +1,27 @@
import pytest
from pandas import DataFrame
import pandas._testing as tm
class TestGet:
def test_get(self, float_frame):
b = float_frame.get("B")
tm.assert_series_equal(b, float_frame["B"])
assert float_frame.get("foo") is None
tm.assert_series_equal(
float_frame.get("foo", float_frame["B"]), float_frame["B"]
)
@pytest.mark.parametrize(
"df",
[
DataFrame(),
DataFrame(columns=list("AB")),
DataFrame(columns=list("AB"), index=range(3)),
],
)
def test_get_none(self, df):
# see gh-5652
assert df.get(None) is None

View File

@ -0,0 +1,22 @@
import pytest
from pandas import (
DataFrame,
MultiIndex,
)
class TestGetValue:
def test_get_set_value_no_partial_indexing(self):
# partial w/ MultiIndex raise exception
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
df = DataFrame(index=index, columns=range(4))
with pytest.raises(KeyError, match=r"^0$"):
df._get_value(0, 1)
def test_get_value(self, float_frame):
for idx in float_frame.index:
for col in float_frame.columns:
result = float_frame._get_value(idx, col)
expected = float_frame[col][idx]
assert result == expected

View File

@ -0,0 +1,472 @@
import re
import numpy as np
import pytest
from pandas import (
Categorical,
CategoricalDtype,
CategoricalIndex,
DataFrame,
DateOffset,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timestamp,
concat,
date_range,
get_dummies,
period_range,
)
import pandas._testing as tm
from pandas.core.arrays import SparseArray
class TestGetitem:
def test_getitem_unused_level_raises(self):
# GH#20410
mi = MultiIndex(
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
codes=[[1, 0], [1, 0]],
)
df = DataFrame(-1, index=range(3), columns=mi)
with pytest.raises(KeyError, match="notevenone"):
df["notevenone"]
def test_getitem_periodindex(self):
rng = period_range("1/1/2000", periods=5)
df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)), columns=rng)
ts = df[rng[0]]
tm.assert_series_equal(ts, df.iloc[:, 0])
ts = df["1/1/2000"]
tm.assert_series_equal(ts, df.iloc[:, 0])
def test_getitem_list_of_labels_categoricalindex_cols(self):
# GH#16115
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
expected = DataFrame([[1, 0], [0, 1]], dtype="bool", index=[0, 1], columns=cats)
dummies = get_dummies(cats)
result = dummies[list(dummies.columns)]
tm.assert_frame_equal(result, expected)
def test_getitem_sparse_column_return_type_and_dtype(self):
# https://github.com/pandas-dev/pandas/issues/23559
data = SparseArray([0, 1])
df = DataFrame({"A": data})
expected = Series(data, name="A")
result = df["A"]
tm.assert_series_equal(result, expected)
# Also check iloc and loc while we're here
result = df.iloc[:, 0]
tm.assert_series_equal(result, expected)
result = df.loc[:, "A"]
tm.assert_series_equal(result, expected)
def test_getitem_string_columns(self):
# GH#46185
df = DataFrame([[1, 2]], columns=Index(["A", "B"], dtype="string"))
result = df.A
expected = df["A"]
tm.assert_series_equal(result, expected)
class TestGetitemListLike:
def test_getitem_list_missing_key(self):
# GH#13822, incorrect error string with non-unique columns when missing
# column is accessed
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
df.columns = ["x", "x", "z"]
# Check that we get the correct value in the KeyError
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
df[["x", "y", "z"]]
def test_getitem_list_duplicates(self):
# GH#1943
df = DataFrame(
np.random.default_rng(2).standard_normal((4, 4)), columns=list("AABC")
)
df.columns.name = "foo"
result = df[["B", "C"]]
assert result.columns.name == "foo"
expected = df.iloc[:, 2:]
tm.assert_frame_equal(result, expected)
def test_getitem_dupe_cols(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
msg = "\"None of [Index(['baf'], dtype="
with pytest.raises(KeyError, match=re.escape(msg)):
df[["baf"]]
@pytest.mark.parametrize(
"idx_type",
[
list,
iter,
Index,
set,
lambda keys: dict(zip(keys, range(len(keys)))),
lambda keys: dict(zip(keys, range(len(keys)))).keys(),
],
ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
)
@pytest.mark.parametrize("levels", [1, 2])
def test_getitem_listlike(self, idx_type, levels, float_frame):
# GH#21294
if levels == 1:
frame, missing = float_frame, "food"
else:
# MultiIndex columns
frame = DataFrame(
np.random.default_rng(2).standard_normal((8, 3)),
columns=Index(
[("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
name=("sth", "sth2"),
),
)
missing = ("good", "food")
keys = [frame.columns[1], frame.columns[0]]
idx = idx_type(keys)
idx_check = list(idx_type(keys))
if isinstance(idx, (set, dict)):
with pytest.raises(TypeError, match="as an indexer is not supported"):
frame[idx]
return
else:
result = frame[idx]
expected = frame.loc[:, idx_check]
expected.columns.names = frame.columns.names
tm.assert_frame_equal(result, expected)
idx = idx_type(keys + [missing])
with pytest.raises(KeyError, match="not in index"):
frame[idx]
def test_getitem_iloc_generator(self):
# GH#39614
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
indexer = (x for x in [1, 2])
result = df.iloc[indexer]
expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
tm.assert_frame_equal(result, expected)
def test_getitem_iloc_two_dimensional_generator(self):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
indexer = (x for x in [1, 2])
result = df.iloc[indexer, 1]
expected = Series([5, 6], name="b", index=[1, 2])
tm.assert_series_equal(result, expected)
def test_getitem_iloc_dateoffset_days(self):
# GH 46671
df = DataFrame(
list(range(10)),
index=date_range("01-01-2022", periods=10, freq=DateOffset(days=1)),
)
result = df.loc["2022-01-01":"2022-01-03"]
expected = DataFrame(
[0, 1, 2],
index=DatetimeIndex(
["2022-01-01", "2022-01-02", "2022-01-03"],
dtype="datetime64[ns]",
freq=DateOffset(days=1),
),
)
tm.assert_frame_equal(result, expected)
df = DataFrame(
list(range(10)),
index=date_range(
"01-01-2022", periods=10, freq=DateOffset(days=1, hours=2)
),
)
result = df.loc["2022-01-01":"2022-01-03"]
expected = DataFrame(
[0, 1, 2],
index=DatetimeIndex(
["2022-01-01 00:00:00", "2022-01-02 02:00:00", "2022-01-03 04:00:00"],
dtype="datetime64[ns]",
freq=DateOffset(days=1, hours=2),
),
)
tm.assert_frame_equal(result, expected)
df = DataFrame(
list(range(10)),
index=date_range("01-01-2022", periods=10, freq=DateOffset(minutes=3)),
)
result = df.loc["2022-01-01":"2022-01-03"]
tm.assert_frame_equal(result, df)
class TestGetitemCallable:
def test_getitem_callable(self, float_frame):
# GH#12533
result = float_frame[lambda x: "A"]
expected = float_frame.loc[:, "A"]
tm.assert_series_equal(result, expected)
result = float_frame[lambda x: ["A", "B"]]
expected = float_frame.loc[:, ["A", "B"]]
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
df = float_frame[:3]
result = df[lambda x: [True, False, True]]
expected = float_frame.iloc[[0, 2], :]
tm.assert_frame_equal(result, expected)
def test_loc_multiindex_columns_one_level(self):
# GH#29749
df = DataFrame([[1, 2]], columns=[["a", "b"]])
expected = DataFrame([1], columns=[["a"]])
result = df["a"]
tm.assert_frame_equal(result, expected)
result = df.loc[:, "a"]
tm.assert_frame_equal(result, expected)
class TestGetitemBooleanMask:
def test_getitem_bool_mask_categorical_index(self):
df3 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
[1, 1, 2, 1, 3, 2],
dtype=CategoricalDtype([3, 2, 1], ordered=True),
name="B",
),
)
df4 = DataFrame(
{
"A": np.arange(6, dtype="int64"),
},
index=CategoricalIndex(
[1, 1, 2, 1, 3, 2],
dtype=CategoricalDtype([3, 2, 1], ordered=False),
name="B",
),
)
result = df3[df3.index == "a"]
expected = df3.iloc[[]]
tm.assert_frame_equal(result, expected)
result = df4[df4.index == "a"]
expected = df4.iloc[[]]
tm.assert_frame_equal(result, expected)
result = df3[df3.index == 1]
expected = df3.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
result = df4[df4.index == 1]
expected = df4.iloc[[0, 1, 3]]
tm.assert_frame_equal(result, expected)
# since we have an ordered categorical
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=True,
# name='B')
result = df3[df3.index < 2]
expected = df3.iloc[[4]]
tm.assert_frame_equal(result, expected)
result = df3[df3.index > 1]
expected = df3.iloc[[]]
tm.assert_frame_equal(result, expected)
# unordered
# cannot be compared
# CategoricalIndex([1, 1, 2, 1, 3, 2],
# categories=[3, 2, 1],
# ordered=False,
# name='B')
msg = "Unordered Categoricals can only compare equality or not"
with pytest.raises(TypeError, match=msg):
df4[df4.index < 2]
with pytest.raises(TypeError, match=msg):
df4[df4.index > 1]
@pytest.mark.parametrize(
"data1,data2,expected_data",
(
(
[[1, 2], [3, 4]],
[[0.5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
),
(
[[1, 2], [3, 4]],
[[5, 6], [7, 8]],
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
),
),
)
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
self,
data1,
data2,
expected_data,
):
# GH#31954
df1 = DataFrame(np.array(data1))
df2 = DataFrame(np.array(data2))
df = concat([df1, df2], axis=1)
result = df[df > 2]
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
tm.assert_frame_equal(result, expected)
@pytest.fixture
def df_dup_cols(self):
dups = ["A", "A", "C", "D"]
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
return df
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
# `df.A > 6` is a DataFrame with a different shape from df
# boolean with the duplicate raises
df = df_dup_cols
msg = "cannot reindex on an axis with duplicate labels"
with pytest.raises(ValueError, match=msg):
df[df.A > 6]
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
# boolean indexing
# GH#4879
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
expected = df[df.C > 6]
expected.columns = df_dup_cols.columns
df = df_dup_cols
result = df[df.C > 6]
tm.assert_frame_equal(result, expected)
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
# where
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
# `df > 6` is a DataFrame with the same shape+alignment as df
expected = df[df > 6]
expected.columns = df_dup_cols.columns
df = df_dup_cols
result = df[df > 6]
tm.assert_frame_equal(result, expected)
def test_getitem_empty_frame_with_boolean(self):
# Test for issue GH#11859
df = DataFrame()
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)
def test_getitem_returns_view_when_column_is_unique_in_df(
self, using_copy_on_write, warn_copy_on_write
):
# GH#45316
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
df_orig = df.copy()
view = df["b"]
with tm.assert_cow_warning(warn_copy_on_write):
view.loc[:] = 100
if using_copy_on_write:
expected = df_orig
else:
expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"])
tm.assert_frame_equal(df, expected)
def test_getitem_frozenset_unique_in_column(self):
# GH#41062
df = DataFrame([[1, 2, 3, 4]], columns=[frozenset(["KEY"]), "B", "C", "C"])
result = df[frozenset(["KEY"])]
expected = Series([1], name=frozenset(["KEY"]))
tm.assert_series_equal(result, expected)
class TestGetitemSlice:
def test_getitem_slice_float64(self, frame_or_series):
values = np.arange(10.0, 50.0, 2)
index = Index(values)
start, end = values[[5, 15]]
data = np.random.default_rng(2).standard_normal((20, 3))
if frame_or_series is not DataFrame:
data = data[:, 0]
obj = frame_or_series(data, index=index)
result = obj[start:end]
expected = obj.iloc[5:16]
tm.assert_equal(result, expected)
result = obj.loc[start:end]
tm.assert_equal(result, expected)
def test_getitem_datetime_slice(self):
# GH#43223
df = DataFrame(
{"a": 0},
index=DatetimeIndex(
[
"11.01.2011 22:00",
"11.01.2011 23:00",
"12.01.2011 00:00",
"2011-01-13 00:00",
]
),
)
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
df["2011-01-01":"2011-11-01"]
def test_getitem_slice_same_dim_only_one_axis(self):
# GH#54622
df = DataFrame(np.random.default_rng(2).standard_normal((10, 8)))
result = df.iloc[(slice(None, None, 2),)]
assert result.shape == (5, 8)
expected = df.iloc[slice(None, None, 2), slice(None)]
tm.assert_frame_equal(result, expected)
class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825 enforced in 2.0
df = DataFrame(
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
)
with pytest.raises(TypeError, match="as an indexer is not supported"):
df[key]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,120 @@
"""
test_insert is specifically for the DataFrame.insert method; not to be
confused with tests with "insert" in their names that are really testing
__setitem__.
"""
import numpy as np
import pytest
from pandas.errors import PerformanceWarning
from pandas import (
DataFrame,
Index,
)
import pandas._testing as tm
class TestDataFrameInsert:
def test_insert(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 3)),
index=np.arange(5),
columns=["c", "b", "a"],
)
df.insert(0, "foo", df["a"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
tm.assert_series_equal(df["a"], df["foo"], check_names=False)
df.insert(2, "bar", df["c"])
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
with pytest.raises(ValueError, match="already exists"):
df.insert(1, "a", df["b"])
msg = "cannot insert c, already exists"
with pytest.raises(ValueError, match=msg):
df.insert(1, "c", df["b"])
df.columns.name = "some_name"
# preserve columns name field
df.insert(0, "baz", df["c"])
assert df.columns.name == "some_name"
def test_insert_column_bug_4032(self):
# GH#4032, inserting a column and renaming causing errors
df = DataFrame({"b": [1.1, 2.2]})
df = df.rename(columns={})
df.insert(0, "a", [1, 2])
result = df.rename(columns={})
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
tm.assert_frame_equal(result, expected)
df.insert(0, "c", [1.3, 2.3])
result = df.rename(columns={})
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
tm.assert_frame_equal(result, expected)
def test_insert_with_columns_dups(self):
# GH#14291
df = DataFrame()
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
exp = DataFrame(
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
)
tm.assert_frame_equal(df, exp)
def test_insert_item_cache(self, using_array_manager, using_copy_on_write):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
ser = df[0]
if using_array_manager:
expected_warning = None
else:
# with BlockManager warn about high fragmentation of single dtype
expected_warning = PerformanceWarning
with tm.assert_produces_warning(expected_warning):
for n in range(100):
df[n + 3] = df[1] * n
if using_copy_on_write:
ser.iloc[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] != 99
else:
ser.values[0] = 99
assert df.iloc[0, 0] == df[0][0]
assert df.iloc[0, 0] == 99
def test_insert_EA_no_warning(self):
# PerformanceWarning about fragmented frame should not be raised when
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)
df = DataFrame(
np.random.default_rng(2).integers(0, 100, size=(3, 100)), dtype="Int64"
)
with tm.assert_produces_warning(None):
df["a"] = np.array([1, 2, 3])
def test_insert_frame(self):
# GH#42403
df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
msg = (
"Expected a one-dimensional object, got a DataFrame with 2 columns instead."
)
with pytest.raises(ValueError, match=msg):
df.insert(1, "newcol", df)
def test_insert_int64_loc(self):
# GH#53193
df = DataFrame({"a": [1, 2]})
df.insert(np.int64(0), "b", 0)
tm.assert_frame_equal(df, DataFrame({"b": [0, 0], "a": [1, 2]}))

View File

@ -0,0 +1,152 @@
"""
Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
"""
import numpy as np
from pandas import (
NA,
DataFrame,
Float64Dtype,
Series,
StringDtype,
Timedelta,
isna,
)
import pandas._testing as tm
class TestDataFrameMask:
def test_mask(self):
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
cond = df > 0
rs = df.where(cond, np.nan)
tm.assert_frame_equal(rs, df.mask(df <= 0))
tm.assert_frame_equal(rs, df.mask(~cond))
other = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
rs = df.where(cond, other)
tm.assert_frame_equal(rs, df.mask(df <= 0, other))
tm.assert_frame_equal(rs, df.mask(~cond, other))
def test_mask2(self):
# see GH#21891
df = DataFrame([1, 2])
res = df.mask([[True], [False]])
exp = DataFrame([np.nan, 2])
tm.assert_frame_equal(res, exp)
def test_mask_inplace(self):
# GH#8801
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)))
cond = df > 0
rdf = df.copy()
return_value = rdf.where(cond, inplace=True)
assert return_value is None
tm.assert_frame_equal(rdf, df.where(cond))
tm.assert_frame_equal(rdf, df.mask(~cond))
rdf = df.copy()
return_value = rdf.where(cond, -df, inplace=True)
assert return_value is None
tm.assert_frame_equal(rdf, df.where(cond, -df))
tm.assert_frame_equal(rdf, df.mask(~cond, -df))
def test_mask_edge_case_1xN_frame(self):
# GH#4071
df = DataFrame([[1, 2]])
res = df.mask(DataFrame([[True, False]]))
expec = DataFrame([[np.nan, 2]])
tm.assert_frame_equal(res, expec)
def test_mask_callable(self):
# GH#12533
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
result = df.mask(lambda x: x > 4, lambda x: x + 1)
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
# return ndarray and scalar
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
# chain
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
def test_mask_dtype_bool_conversion(self):
# GH#3733
df = DataFrame(data=np.random.default_rng(2).standard_normal((100, 50)))
df = df.where(df > 0) # create nans
bools = df > 0
mask = isna(df)
expected = bools.astype(object).mask(mask)
result = bools.mask(mask)
tm.assert_frame_equal(result, expected)
def test_mask_stringdtype(frame_or_series):
# GH 40824
obj = DataFrame(
{"A": ["foo", "bar", "baz", NA]},
index=["id1", "id2", "id3", "id4"],
dtype=StringDtype(),
)
filtered_obj = DataFrame(
{"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
)
expected = DataFrame(
{"A": [NA, "this", "that", NA]},
index=["id1", "id2", "id3", "id4"],
dtype=StringDtype(),
)
if frame_or_series is Series:
obj = obj["A"]
filtered_obj = filtered_obj["A"]
expected = expected["A"]
filter_ser = Series([False, True, True, False])
result = obj.mask(filter_ser, filtered_obj)
tm.assert_equal(result, expected)
def test_mask_where_dtype_timedelta():
# https://github.com/pandas-dev/pandas/issues/39548
df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
tm.assert_frame_equal(df.mask(df.notna()), expected)
expected = DataFrame(
[np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
)
tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
def test_mask_return_dtype():
# GH#50488
ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
cond = ~ser.isna()
other = Series([True, False, True, False])
excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
result = ser.mask(cond, other)
tm.assert_series_equal(result, excepted)
def test_mask_inplace_no_other():
# GH#51685
df = DataFrame({"a": [1.0, 2.0], "b": ["x", "y"]})
cond = DataFrame({"a": [True, False], "b": [False, True]})
df.mask(cond, inplace=True)
expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})
tm.assert_frame_equal(df, expected)

View File

@ -0,0 +1,77 @@
import numpy as np
from pandas.core.dtypes.common import is_float_dtype
from pandas import (
DataFrame,
isna,
)
import pandas._testing as tm
class TestSetValue:
def test_set_value(self, float_frame):
for idx in float_frame.index:
for col in float_frame.columns:
float_frame._set_value(idx, col, 1)
assert float_frame[col][idx] == 1
def test_set_value_resize(self, float_frame, using_infer_string):
res = float_frame._set_value("foobar", "B", 0)
assert res is None
assert float_frame.index[-1] == "foobar"
assert float_frame._get_value("foobar", "B") == 0
float_frame.loc["foobar", "qux"] = 0
assert float_frame._get_value("foobar", "qux") == 0
res = float_frame.copy()
res._set_value("foobar", "baz", "sam")
if using_infer_string:
assert res["baz"].dtype == "string"
else:
assert res["baz"].dtype == np.object_
res = float_frame.copy()
res._set_value("foobar", "baz", True)
assert res["baz"].dtype == np.object_
res = float_frame.copy()
res._set_value("foobar", "baz", 5)
assert is_float_dtype(res["baz"])
assert isna(res["baz"].drop(["foobar"])).all()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
res._set_value("foobar", "baz", "sam")
assert res.loc["foobar", "baz"] == "sam"
def test_set_value_with_index_dtype_change(self):
df_orig = DataFrame(
np.random.default_rng(2).standard_normal((3, 3)),
index=range(3),
columns=list("ABC"),
)
# this is actually ambiguous as the 2 is interpreted as a positional
# so column is not created
df = df_orig.copy()
df._set_value("C", 2, 1.0)
assert list(df.index) == list(df_orig.index) + ["C"]
# assert list(df.columns) == list(df_orig.columns) + [2]
df = df_orig.copy()
df.loc["C", 2] = 1.0
assert list(df.index) == list(df_orig.index) + ["C"]
# assert list(df.columns) == list(df_orig.columns) + [2]
# create both new
df = df_orig.copy()
df._set_value("C", "D", 1.0)
assert list(df.index) == list(df_orig.index) + ["C"]
assert list(df.columns) == list(df_orig.columns) + ["D"]
df = df_orig.copy()
df.loc["C", "D"] = 1.0
assert list(df.index) == list(df_orig.index) + ["C"]
assert list(df.columns) == list(df_orig.columns) + ["D"]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,92 @@
import pytest
import pandas._testing as tm
class TestDataFrameTake:
def test_take_slices_deprecated(self, float_frame):
# GH#51539
df = float_frame
slc = slice(0, 4, 1)
with tm.assert_produces_warning(FutureWarning):
df.take(slc, axis=0)
with tm.assert_produces_warning(FutureWarning):
df.take(slc, axis=1)
def test_take(self, float_frame):
# homogeneous
order = [3, 1, 2, 0]
for df in [float_frame]:
result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["D", "B", "C", "A"]]
tm.assert_frame_equal(result, expected, check_names=False)
# negative indices
order = [2, 1, -1]
for df in [float_frame]:
result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)
result = df.take(order, axis=0)
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["C", "B", "D"]]
tm.assert_frame_equal(result, expected, check_names=False)
# illegal indices
msg = "indices are out-of-bounds"
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, 30], axis=0)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, -31], axis=0)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, 5], axis=1)
with pytest.raises(IndexError, match=msg):
df.take([3, 1, 2, -5], axis=1)
def test_take_mixed_type(self, float_string_frame):
# mixed-dtype
order = [4, 1, 2, 0, 3]
for df in [float_string_frame]:
result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
tm.assert_frame_equal(result, expected)
# negative indices
order = [4, 1, -2]
for df in [float_string_frame]:
result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["foo", "B", "D"]]
tm.assert_frame_equal(result, expected)
def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
# by dtype
order = [1, 2, 0, 3]
for df in [mixed_float_frame, mixed_int_frame]:
result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
tm.assert_frame_equal(result, expected)
# axis = 1
result = df.take(order, axis=1)
expected = df.loc[:, ["B", "C", "A", "D"]]
tm.assert_frame_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,444 @@
import re
import numpy as np
import pytest
from pandas.errors import SettingWithCopyError
from pandas import (
DataFrame,
Index,
IndexSlice,
MultiIndex,
Series,
concat,
)
import pandas._testing as tm
from pandas.tseries.offsets import BDay
@pytest.fixture
def four_level_index_dataframe():
arr = np.array(
[
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
]
)
index = MultiIndex(
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
names=["one", "two", "three", "four"],
)
return DataFrame(arr, index=index, columns=list("ABCDE"))
class TestXS:
def test_xs(
self, float_frame, datetime_frame, using_copy_on_write, warn_copy_on_write
):
float_frame_orig = float_frame.copy()
idx = float_frame.index[5]
xs = float_frame.xs(idx)
for item, value in xs.items():
if np.isnan(value):
assert np.isnan(float_frame[item][idx])
else:
assert value == float_frame[item][idx]
# mixed-type xs
test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
frame = DataFrame(test_data)
xs = frame.xs("1")
assert xs.dtype == np.object_
assert xs["A"] == 1
assert xs["B"] == "1"
with pytest.raises(
KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')")
):
datetime_frame.xs(datetime_frame.index[0] - BDay())
# xs get column
series = float_frame.xs("A", axis=1)
expected = float_frame["A"]
tm.assert_series_equal(series, expected)
# view is returned if possible
series = float_frame.xs("A", axis=1)
with tm.assert_cow_warning(warn_copy_on_write):
series[:] = 5
if using_copy_on_write:
# but with CoW the view shouldn't propagate mutations
tm.assert_series_equal(float_frame["A"], float_frame_orig["A"])
assert not (expected == 5).all()
else:
assert (expected == 5).all()
def test_xs_corner(self):
# pathological mixed-type reordering case
df = DataFrame(index=[0])
df["A"] = 1.0
df["B"] = "foo"
df["C"] = 2.0
df["D"] = "bar"
df["E"] = 3.0
xs = df.xs(0)
exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0)
tm.assert_series_equal(xs, exp)
# no columns but Index(dtype=object)
df = DataFrame(index=["a", "b", "c"])
result = df.xs("a")
expected = Series([], name="a", dtype=np.float64)
tm.assert_series_equal(result, expected)
def test_xs_duplicates(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 2)),
index=["b", "b", "c", "b", "a"],
)
cross = df.xs("c")
exp = df.iloc[2]
tm.assert_series_equal(cross, exp)
def test_xs_keep_level(self):
df = DataFrame(
{
"day": {0: "sat", 1: "sun"},
"flavour": {0: "strawberry", 1: "strawberry"},
"sales": {0: 10, 1: 12},
"year": {0: 2008, 1: 2008},
}
).set_index(["year", "flavour", "day"])
result = df.xs("sat", level="day", drop_level=False)
expected = df[:1]
tm.assert_frame_equal(result, expected)
result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False)
tm.assert_frame_equal(result, expected)
def test_xs_view(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
# in 0.14 this will return a view if possible a copy otherwise, but
# this is numpy dependent
dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
df_orig = dm.copy()
if using_copy_on_write:
with tm.raises_chained_assignment_error():
dm.xs(2)[:] = 20
tm.assert_frame_equal(dm, df_orig)
elif using_array_manager:
# INFO(ArrayManager) with ArrayManager getting a row as a view is
# not possible
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
dm.xs(2)[:] = 20
assert not (dm.xs(2) == 20).any()
else:
with tm.raises_chained_assignment_error():
dm.xs(2)[:] = 20
assert (dm.xs(2) == 20).all()
class TestXSWithMultiIndex:
def test_xs_doc_example(self):
# TODO: more descriptive name
# based on example in advanced.rst
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = list(zip(*arrays))
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
df = DataFrame(
np.random.default_rng(2).standard_normal((3, 8)),
index=["A", "B", "C"],
columns=index,
)
result = df.xs(("one", "bar"), level=("second", "first"), axis=1)
expected = df.iloc[:, [0]]
tm.assert_frame_equal(result, expected)
def test_xs_integer_key(self):
# see GH#2107
dates = range(20111201, 20111205)
ids = list("abcde")
index = MultiIndex.from_product([dates, ids], names=["date", "secid"])
df = DataFrame(
np.random.default_rng(2).standard_normal((len(index), 3)),
index,
["X", "Y", "Z"],
)
result = df.xs(20111201, level="date")
expected = df.loc[20111201, :]
tm.assert_frame_equal(result, expected)
def test_xs_level(self, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs("two", level="second")
expected = df[df.index.get_level_values(1) == "two"]
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
tm.assert_frame_equal(result, expected)
def test_xs_level_eq_2(self):
arr = np.random.default_rng(2).standard_normal((3, 5))
index = MultiIndex(
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
)
df = DataFrame(arr, index=index)
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
result = df.xs("c", level=2)
tm.assert_frame_equal(result, expected)
def test_xs_setting_with_copy_error(
self,
multiindex_dataframe_random_data,
using_copy_on_write,
warn_copy_on_write,
):
# this is a copy in 0.14
df = multiindex_dataframe_random_data
df_orig = df.copy()
result = df.xs("two", level="second")
if using_copy_on_write or warn_copy_on_write:
result[:] = 10
else:
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
result[:] = 10
tm.assert_frame_equal(df, df_orig)
def test_xs_setting_with_copy_error_multiple(
self, four_level_index_dataframe, using_copy_on_write, warn_copy_on_write
):
# this is a copy in 0.14
df = four_level_index_dataframe
df_orig = df.copy()
result = df.xs(("a", 4), level=["one", "four"])
if using_copy_on_write or warn_copy_on_write:
result[:] = 10
else:
# setting this will give a SettingWithCopyError
# as we are trying to write a view
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
result[:] = 10
tm.assert_frame_equal(df, df_orig)
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
# see GH#13719
frame = multiindex_dataframe_random_data
df = concat([frame] * 2)
assert df.index.is_unique is False
expected = concat([frame.xs("one", level="second")] * 2)
if isinstance(key, list):
result = df.xs(tuple(key), level=level)
else:
result = df.xs(key, level=level)
tm.assert_frame_equal(result, expected)
def test_xs_missing_values_in_index(self):
# see GH#6574
# missing values in returned index should be preserved
acc = [
("a", "abcde", 1),
("b", "bbcde", 2),
("y", "yzcde", 25),
("z", "xbcde", 24),
("z", None, 26),
("z", "zbcde", 25),
("z", "ybcde", 26),
]
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
expected = DataFrame(
{"cnt": [24, 26, 25, 26]},
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
)
result = df.xs("z", level="a1")
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"key, level, exp_arr, exp_index",
[
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
],
)
def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index):
# see GH#2903
arr = np.random.default_rng(2).standard_normal((4, 4))
index = MultiIndex(
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
names=["lvl0", "lvl1"],
)
df = DataFrame(arr, columns=index)
result = df.xs(key, level=level, axis=1)
expected = DataFrame(exp_arr(arr), columns=exp_index)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer",
[
lambda df: df.xs(("a", 4), level=["one", "four"]),
lambda df: df.xs("a").xs(4, level="four"),
],
)
def test_xs_level_multiple(self, indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
expected_index = MultiIndex(
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
)
expected = DataFrame(
expected_values, index=expected_index, columns=list("ABCDE")
)
result = indexer(df)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
)
def test_xs_level0(self, indexer, four_level_index_dataframe):
df = four_level_index_dataframe
expected_values = [
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
]
expected_index = MultiIndex(
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
codes=[[0, 1], [0, 1], [1, 0]],
names=["two", "three", "four"],
)
expected = DataFrame(
expected_values, index=expected_index, columns=list("ABCDE")
)
result = indexer(df)
tm.assert_frame_equal(result, expected)
def test_xs_values(self, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(("bar", "two")).values
expected = df.values[4]
tm.assert_almost_equal(result, expected)
def test_xs_loc_equality(self, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.xs(("bar", "two"))
expected = df.loc[("bar", "two")]
tm.assert_series_equal(result, expected)
def test_xs_IndexSlice_argument_not_implemented(self, frame_or_series):
# GH#35301
index = MultiIndex(
levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
)
obj = DataFrame(np.random.default_rng(2).standard_normal((6, 4)), index=index)
if frame_or_series is Series:
obj = obj[0]
expected = obj.iloc[-2:].droplevel(0)
result = obj.xs(IndexSlice[("foo", "qux", 0), :])
tm.assert_equal(result, expected)
result = obj.loc[IndexSlice[("foo", "qux", 0), :]]
tm.assert_equal(result, expected)
def test_xs_levels_raises(self, frame_or_series):
obj = DataFrame({"A": [1, 2, 3]})
if frame_or_series is Series:
obj = obj["A"]
msg = "Index must be a MultiIndex"
with pytest.raises(TypeError, match=msg):
obj.xs(0, level="as")
def test_xs_multiindex_droplevel_false(self):
# GH#19056
mi = MultiIndex.from_tuples(
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
)
df = DataFrame([[1, 2, 3]], columns=mi)
result = df.xs("a", axis=1, drop_level=False)
expected = DataFrame(
[[1, 2]],
columns=MultiIndex.from_tuples(
[("a", "x"), ("a", "y")], names=["level1", "level2"]
),
)
tm.assert_frame_equal(result, expected)
def test_xs_droplevel_false(self):
# GH#19056
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)
def test_xs_droplevel_false_view(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
# GH#37832
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
# check that result still views the same data as df
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
with tm.assert_cow_warning(warn_copy_on_write):
df.iloc[0, 0] = 2
if using_copy_on_write:
# with copy on write the subset is never modified
expected = DataFrame({"a": [1]})
else:
# modifying original df also modifies result when having a single block
expected = DataFrame({"a": [2]})
tm.assert_frame_equal(result, expected)
# with mixed dataframe, modifying the parent doesn't modify result
# TODO the "split" path behaves differently here as with single block
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
df.iloc[0, 0] = 2
if using_copy_on_write:
# with copy on write the subset is never modified
expected = DataFrame({"a": [1]})
elif using_array_manager:
# Here the behavior is consistent
expected = DataFrame({"a": [2]})
else:
# FIXME: iloc does not update the array inplace using
# "split" path
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)
def test_xs_list_indexer_droplevel_false(self):
# GH#41760
mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")])
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
with pytest.raises(KeyError, match="y"):
df.xs(("x", "y"), drop_level=False, axis=1)