Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,413 @@
|
||||
"""Tests dealing with the NDFrame.allows_duplicates."""
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
not_implemented = pytest.mark.xfail(reason="Not implemented.")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Preservation
|
||||
|
||||
|
||||
class TestPreserves:
|
||||
@pytest.mark.parametrize(
|
||||
"cls, data",
|
||||
[
|
||||
(pd.Series, np.array([])),
|
||||
(pd.Series, [1, 2]),
|
||||
(pd.DataFrame, {}),
|
||||
(pd.DataFrame, {"A": [1, 2]}),
|
||||
],
|
||||
)
|
||||
def test_construction_ok(self, cls, data):
|
||||
result = cls(data)
|
||||
assert result.flags.allows_duplicate_labels is True
|
||||
|
||||
result = cls(data).set_flags(allows_duplicate_labels=False)
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
operator.itemgetter(["a"]),
|
||||
operator.methodcaller("add", 1),
|
||||
operator.methodcaller("rename", str.upper),
|
||||
operator.methodcaller("rename", "name"),
|
||||
operator.methodcaller("abs"),
|
||||
np.abs,
|
||||
],
|
||||
)
|
||||
def test_preserved_series(self, func):
|
||||
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
assert func(s).flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])]
|
||||
)
|
||||
# TODO: frame
|
||||
@not_implemented
|
||||
def test_align(self, other):
|
||||
s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
a, b = s.align(other)
|
||||
assert a.flags.allows_duplicate_labels is False
|
||||
assert b.flags.allows_duplicate_labels is False
|
||||
|
||||
def test_preserved_frame(self):
|
||||
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
assert df.loc[["a"]].flags.allows_duplicate_labels is False
|
||||
assert df.loc[:, ["A", "B"]].flags.allows_duplicate_labels is False
|
||||
|
||||
def test_to_frame(self):
|
||||
ser = pd.Series(dtype=float).set_flags(allows_duplicate_labels=False)
|
||||
assert ser.to_frame().flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize("func", ["add", "sub"])
|
||||
@pytest.mark.parametrize("frame", [False, True])
|
||||
@pytest.mark.parametrize("other", [1, pd.Series([1, 2], name="A")])
|
||||
def test_binops(self, func, other, frame):
|
||||
df = pd.Series([1, 2], name="A", index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
if frame:
|
||||
df = df.to_frame()
|
||||
if isinstance(other, pd.Series) and frame:
|
||||
other = other.to_frame()
|
||||
func = operator.methodcaller(func, other)
|
||||
assert df.flags.allows_duplicate_labels is False
|
||||
assert func(df).flags.allows_duplicate_labels is False
|
||||
|
||||
def test_preserve_getitem(self):
|
||||
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
|
||||
assert df[["A"]].flags.allows_duplicate_labels is False
|
||||
assert df["A"].flags.allows_duplicate_labels is False
|
||||
assert df.loc[0].flags.allows_duplicate_labels is False
|
||||
assert df.loc[[0]].flags.allows_duplicate_labels is False
|
||||
assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False
|
||||
|
||||
def test_ndframe_getitem_caching_issue(
|
||||
self, request, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
if not (using_copy_on_write or warn_copy_on_write):
|
||||
request.applymarker(pytest.mark.xfail(reason="Unclear behavior."))
|
||||
# NDFrame.__getitem__ will cache the first df['A']. May need to
|
||||
# invalidate that cache? Update the cached entries?
|
||||
df = pd.DataFrame({"A": [0]}).set_flags(allows_duplicate_labels=False)
|
||||
assert df["A"].flags.allows_duplicate_labels is False
|
||||
df.flags.allows_duplicate_labels = True
|
||||
assert df["A"].flags.allows_duplicate_labels is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objs, kwargs",
|
||||
[
|
||||
# Series
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["c", "d"]),
|
||||
],
|
||||
{},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["a", "b"]),
|
||||
],
|
||||
{"ignore_index": True},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=["a", "b"]),
|
||||
pd.Series(2, index=["a", "b"]),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
# Frame
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"A": [1, 2]}, index=["c", "d"]),
|
||||
],
|
||||
{},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
],
|
||||
{"ignore_index": True},
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.DataFrame({"B": [1, 2]}, index=["a", "b"]),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
# Series / Frame
|
||||
(
|
||||
[
|
||||
pd.DataFrame({"A": [1, 2]}, index=["a", "b"]),
|
||||
pd.Series([1, 2], index=["a", "b"], name="B"),
|
||||
],
|
||||
{"axis": 1},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_concat(self, objs, kwargs):
|
||||
objs = [x.set_flags(allows_duplicate_labels=False) for x in objs]
|
||||
result = pd.concat(objs, **kwargs)
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
# false false false
|
||||
pytest.param(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
False,
|
||||
marks=not_implemented,
|
||||
),
|
||||
# false true false
|
||||
pytest.param(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
|
||||
False,
|
||||
marks=not_implemented,
|
||||
),
|
||||
# true true true
|
||||
(
|
||||
pd.DataFrame({"A": [0, 1]}, index=["a", "b"]),
|
||||
pd.DataFrame({"B": [0, 1]}, index=["a", "d"]),
|
||||
True,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_merge(self, left, right, expected):
|
||||
result = pd.merge(left, right, left_index=True, right_index=True)
|
||||
assert result.flags.allows_duplicate_labels is expected
|
||||
|
||||
@not_implemented
|
||||
def test_groupby(self):
|
||||
# XXX: This is under tested
|
||||
# TODO:
|
||||
# - apply
|
||||
# - transform
|
||||
# - Should passing a grouper that disallows duplicates propagate?
|
||||
df = pd.DataFrame({"A": [1, 2, 3]}).set_flags(allows_duplicate_labels=False)
|
||||
result = df.groupby([0, 0, 1]).agg("count")
|
||||
assert result.flags.allows_duplicate_labels is False
|
||||
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
@not_implemented
|
||||
def test_window(self, frame):
|
||||
df = pd.Series(
|
||||
1,
|
||||
index=pd.date_range("2000", periods=12),
|
||||
name="A",
|
||||
allows_duplicate_labels=False,
|
||||
)
|
||||
if frame:
|
||||
df = df.to_frame()
|
||||
assert df.rolling(3).mean().flags.allows_duplicate_labels is False
|
||||
assert df.ewm(3).mean().flags.allows_duplicate_labels is False
|
||||
assert df.expanding(3).mean().flags.allows_duplicate_labels is False
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Raises
|
||||
|
||||
|
||||
class TestRaises:
|
||||
@pytest.mark.parametrize(
|
||||
"cls, axes",
|
||||
[
|
||||
(pd.Series, {"index": ["a", "a"], "dtype": float}),
|
||||
(pd.DataFrame, {"index": ["a", "a"]}),
|
||||
(pd.DataFrame, {"index": ["a", "a"], "columns": ["b", "b"]}),
|
||||
(pd.DataFrame, {"columns": ["b", "b"]}),
|
||||
],
|
||||
)
|
||||
def test_set_flags_with_duplicates(self, cls, axes):
|
||||
result = cls(**axes)
|
||||
assert result.flags.allows_duplicate_labels is True
|
||||
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
cls(**axes).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(index=[0, 0], dtype=float),
|
||||
pd.DataFrame(index=[0, 0]),
|
||||
pd.DataFrame(columns=[0, 0]),
|
||||
],
|
||||
)
|
||||
def test_setting_allows_duplicate_labels_raises(self, data):
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
data.flags.allows_duplicate_labels = False
|
||||
|
||||
assert data.flags.allows_duplicate_labels is True
|
||||
|
||||
def test_series_raises(self):
|
||||
a = pd.Series(0, index=["a", "b"])
|
||||
b = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False)
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.concat([a, b])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"getter, target",
|
||||
[
|
||||
(operator.itemgetter(["A", "A"]), None),
|
||||
# loc
|
||||
(operator.itemgetter(["a", "a"]), "loc"),
|
||||
pytest.param(operator.itemgetter(("a", ["A", "A"])), "loc"),
|
||||
(operator.itemgetter((["a", "a"], "A")), "loc"),
|
||||
# iloc
|
||||
(operator.itemgetter([0, 0]), "iloc"),
|
||||
pytest.param(operator.itemgetter((0, [0, 0])), "iloc"),
|
||||
pytest.param(operator.itemgetter(([0, 0], 0)), "iloc"),
|
||||
],
|
||||
)
|
||||
def test_getitem_raises(self, getter, target):
|
||||
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}, index=["a", "b"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
if target:
|
||||
# df, df.loc, or df.iloc
|
||||
target = getattr(df, target)
|
||||
else:
|
||||
target = df
|
||||
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
getter(target)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"objs, kwargs",
|
||||
[
|
||||
(
|
||||
[
|
||||
pd.Series(1, index=[0, 1], name="a"),
|
||||
pd.Series(2, index=[0, 1], name="a"),
|
||||
],
|
||||
{"axis": 1},
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_concat_raises(self, objs, kwargs):
|
||||
objs = [x.set_flags(allows_duplicate_labels=False) for x in objs]
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.concat(objs, **kwargs)
|
||||
|
||||
@not_implemented
|
||||
def test_merge_raises(self):
|
||||
a = pd.DataFrame({"A": [0, 1, 2]}, index=["a", "b", "c"]).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
b = pd.DataFrame({"B": [0, 1, 2]}, index=["a", "b", "b"])
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.merge(a, b, left_index=True, right_index=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
pd.Index([1, 1]),
|
||||
pd.Index(["a", "a"]),
|
||||
pd.Index([1.1, 1.1]),
|
||||
pd.PeriodIndex([pd.Period("2000", "D")] * 2),
|
||||
pd.DatetimeIndex([pd.Timestamp("2000")] * 2),
|
||||
pd.TimedeltaIndex([pd.Timedelta("1D")] * 2),
|
||||
pd.CategoricalIndex(["a", "a"]),
|
||||
pd.IntervalIndex([pd.Interval(0, 1)] * 2),
|
||||
pd.MultiIndex.from_tuples([("a", 1), ("a", 1)]),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_raises_basic(idx):
|
||||
msg = "Index has duplicates."
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.Series(1, index=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.DataFrame({"A": [1, 1]}, index=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
with pytest.raises(pd.errors.DuplicateLabelError, match=msg):
|
||||
pd.DataFrame([[1, 2]], columns=idx).set_flags(allows_duplicate_labels=False)
|
||||
|
||||
|
||||
def test_format_duplicate_labels_message():
|
||||
idx = pd.Index(["a", "b", "a", "b", "c"])
|
||||
result = idx._format_duplicate_message()
|
||||
expected = pd.DataFrame(
|
||||
{"positions": [[0, 2], [1, 3]]}, index=pd.Index(["a", "b"], name="label")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_format_duplicate_labels_message_multi():
|
||||
idx = pd.MultiIndex.from_product([["A"], ["a", "b", "a", "b", "c"]])
|
||||
result = idx._format_duplicate_message()
|
||||
expected = pd.DataFrame(
|
||||
{"positions": [[0, 2], [1, 3]]},
|
||||
index=pd.MultiIndex.from_product([["A"], ["a", "b"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dataframe_insert_raises():
|
||||
df = pd.DataFrame({"A": [1, 2]}).set_flags(allows_duplicate_labels=False)
|
||||
msg = "Cannot specify"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(0, "A", [3, 4], allow_duplicates=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, frame_only",
|
||||
[
|
||||
(operator.methodcaller("set_index", "A", inplace=True), True),
|
||||
(operator.methodcaller("reset_index", inplace=True), True),
|
||||
(operator.methodcaller("rename", lambda x: x, inplace=True), False),
|
||||
],
|
||||
)
|
||||
def test_inplace_raises(method, frame_only):
|
||||
df = pd.DataFrame({"A": [0, 0], "B": [1, 2]}).set_flags(
|
||||
allows_duplicate_labels=False
|
||||
)
|
||||
s = df["A"]
|
||||
s.flags.allows_duplicate_labels = False
|
||||
msg = "Cannot specify"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(df)
|
||||
if not frame_only:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(s)
|
||||
|
||||
|
||||
def test_pickle():
|
||||
a = pd.Series([1, 2]).set_flags(allows_duplicate_labels=False)
|
||||
b = tm.round_trip_pickle(a)
|
||||
tm.assert_series_equal(a, b)
|
||||
|
||||
a = pd.DataFrame({"A": []}).set_flags(allows_duplicate_labels=False)
|
||||
b = tm.round_trip_pickle(a)
|
||||
tm.assert_frame_equal(a, b)
|
@ -0,0 +1,767 @@
|
||||
"""
|
||||
An exhaustive list of pandas methods exercising NDFrame.__finalize__.
|
||||
"""
|
||||
import operator
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
# TODO:
|
||||
# * Binary methods (mul, div, etc.)
|
||||
# * Binary outputs (align, etc.)
|
||||
# * top-level methods (concat, merge, get_dummies, etc.)
|
||||
# * window
|
||||
# * cumulative reductions
|
||||
|
||||
not_implemented_mark = pytest.mark.xfail(reason="not implemented")
|
||||
|
||||
mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"])
|
||||
|
||||
frame_data = ({"A": [1]},)
|
||||
frame_mi_data = ({"A": [1, 2, 3, 4]}, mi)
|
||||
|
||||
|
||||
# Tuple of
|
||||
# - Callable: Constructor (Series, DataFrame)
|
||||
# - Tuple: Constructor args
|
||||
# - Callable: pass the constructed value with attrs set to this.
|
||||
|
||||
_all_methods = [
|
||||
(pd.Series, ([0],), operator.methodcaller("take", [])),
|
||||
(pd.Series, ([0],), operator.methodcaller("__getitem__", [True])),
|
||||
(pd.Series, ([0],), operator.methodcaller("repeat", 2)),
|
||||
(pd.Series, ([0],), operator.methodcaller("reset_index")),
|
||||
(pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)),
|
||||
(pd.Series, ([0],), operator.methodcaller("to_frame")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("duplicated")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("round")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("rename", "name")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("drop", [0])),
|
||||
(pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("shift")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isna")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("isnull")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("notna")),
|
||||
(pd.Series, ([0, 0],), operator.methodcaller("notnull")),
|
||||
(pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))),
|
||||
# TODO: mul, div, etc.
|
||||
(
|
||||
pd.Series,
|
||||
([0], pd.period_range("2000", periods=1)),
|
||||
operator.methodcaller("to_timestamp"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
([0], pd.date_range("2000", periods=1)),
|
||||
operator.methodcaller("to_period"),
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("dot", pd.DataFrame(index=["A"])),
|
||||
),
|
||||
marks=pytest.mark.xfail(reason="Implement binary finalize"),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("transpose")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))),
|
||||
(pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("reset_index")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("isna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("isnull")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("notna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("notnull")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("dropna")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("duplicated")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sort_index")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("add", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
# TODO: div, mul, etc.
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("combine_first", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("update", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
marks=not_implemented_mark,
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1], "B": [1]},),
|
||||
operator.methodcaller("pivot_table", columns="A"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1], "B": [1]},),
|
||||
operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("stack")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("explode", "A")),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},),
|
||||
operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
|
||||
pytest.param(
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
|
||||
),
|
||||
marks=not_implemented_mark,
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
|
||||
pytest.param(
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cov")),
|
||||
marks=[
|
||||
pytest.mark.filterwarnings("ignore::RuntimeWarning"),
|
||||
],
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("corrwith", pd.DataFrame(*frame_data)),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("count")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("nunique")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("idxmin")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("idxmax")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mode")),
|
||||
(pd.Series, [0], operator.methodcaller("mode")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("median")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("quantile", numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},),
|
||||
operator.methodcaller("quantile", numeric_only=False),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},),
|
||||
operator.methodcaller("quantile", numeric_only=True),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1]}, [pd.Period("2000", "D")]),
|
||||
operator.methodcaller("to_timestamp"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1]}, [pd.Timestamp("2000")]),
|
||||
operator.methodcaller("to_period", freq="D"),
|
||||
),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_mi_data,
|
||||
operator.methodcaller("isin", pd.DataFrame({"A": [1]})),
|
||||
),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pop", "A")),
|
||||
# Squeeze on columns, otherwise we'll end up with a scalar
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("squeeze", axis="columns")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("squeeze")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")),
|
||||
# Unary ops
|
||||
(pd.DataFrame, frame_data, operator.neg),
|
||||
(pd.Series, [1], operator.neg),
|
||||
(pd.DataFrame, frame_data, operator.pos),
|
||||
(pd.Series, [1], operator.pos),
|
||||
(pd.DataFrame, frame_data, operator.inv),
|
||||
(pd.Series, [1], operator.inv),
|
||||
(pd.DataFrame, frame_data, abs),
|
||||
(pd.Series, [1], abs),
|
||||
(pd.DataFrame, frame_data, round),
|
||||
(pd.Series, [1], round),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")),
|
||||
(pd.Series, (1, mi), operator.methodcaller("xs", "a")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("get", "A")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_data,
|
||||
operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
frame_data,
|
||||
operator.methodcaller("reindex_like", pd.Series([0, 1, 2])),
|
||||
),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")),
|
||||
(pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")),
|
||||
(pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")),
|
||||
(pd.Series, ([3, 2],), operator.methodcaller("sort_values")),
|
||||
(pd.Series, ([1] * 10,), operator.methodcaller("head")),
|
||||
(pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")),
|
||||
(pd.Series, ([1] * 10,), operator.methodcaller("tail")),
|
||||
(pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)),
|
||||
(pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("astype", float)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("astype", float)),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("copy")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("copy")),
|
||||
(pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": np.array([1, 2], dtype=object)},),
|
||||
operator.methodcaller("infer_objects"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")),
|
||||
(pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")),
|
||||
(pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("asfreq", "h"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("asfreq", "h"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("at_time", "12:00"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("at_time", "12:00"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("between_time", "12:00", "13:00"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("between_time", "12:00", "13:00"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("last", "3D"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("last", "3D"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("rank")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("rank")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4, tz="UTC")),
|
||||
operator.methodcaller("tz_convert", "CET"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")),
|
||||
operator.methodcaller("tz_convert", "CET"),
|
||||
),
|
||||
(
|
||||
pd.Series,
|
||||
(1, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
),
|
||||
(
|
||||
pd.DataFrame,
|
||||
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("describe")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("describe")),
|
||||
(pd.Series, ([1, 2],), operator.methodcaller("pct_change")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("pct_change")),
|
||||
(pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())),
|
||||
(
|
||||
pd.DataFrame,
|
||||
frame_mi_data,
|
||||
operator.methodcaller("transform", lambda x: x - x.min()),
|
||||
),
|
||||
(pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)),
|
||||
(pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)),
|
||||
# Cumulative reductions
|
||||
(pd.Series, ([1],), operator.methodcaller("cumsum")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cumsum")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cummin")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cummin")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cummax")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cummax")),
|
||||
(pd.Series, ([1],), operator.methodcaller("cumprod")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("cumprod")),
|
||||
# Reductions
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("any")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("all")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("min")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("max")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sum")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("std")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("mean")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("prod")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("sem")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("skew")),
|
||||
(pd.DataFrame, frame_data, operator.methodcaller("kurt")),
|
||||
]
|
||||
|
||||
|
||||
def idfn(x):
|
||||
xpr = re.compile(r"'(.*)?'")
|
||||
m = xpr.search(str(x))
|
||||
if m:
|
||||
return m.group(1)
|
||||
else:
|
||||
return str(x)
|
||||
|
||||
|
||||
@pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1]))
|
||||
def ndframe_method(request):
|
||||
"""
|
||||
An NDFrame method returning an NDFrame.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning",
|
||||
"ignore:last is deprecated:FutureWarning",
|
||||
)
|
||||
def test_finalize_called(ndframe_method):
|
||||
cls, init_args, method = ndframe_method
|
||||
ndframe = cls(*init_args)
|
||||
|
||||
ndframe.attrs = {"a": 1}
|
||||
result = method(ndframe)
|
||||
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(1, pd.date_range("2000", periods=4)),
|
||||
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
],
|
||||
)
|
||||
def test_finalize_first(data):
|
||||
deprecated_msg = "first is deprecated"
|
||||
|
||||
data.attrs = {"a": 1}
|
||||
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
|
||||
result = data.first("3D")
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
pd.Series(1, pd.date_range("2000", periods=4)),
|
||||
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
|
||||
],
|
||||
)
|
||||
def test_finalize_last(data):
|
||||
# GH 53710
|
||||
deprecated_msg = "last is deprecated"
|
||||
|
||||
data.attrs = {"a": 1}
|
||||
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
|
||||
result = data.last("3D")
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@not_implemented_mark
|
||||
def test_finalize_called_eval_numexpr():
|
||||
pytest.importorskip("numexpr")
|
||||
df = pd.DataFrame({"A": [1, 2]})
|
||||
df.attrs["A"] = 1
|
||||
result = df.eval("A + 1", engine="numexpr")
|
||||
assert result.attrs == {"A": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Binary operations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("annotate", ["left", "right", "both"])
|
||||
@pytest.mark.parametrize(
|
||||
"args",
|
||||
[
|
||||
(1, pd.Series([1])),
|
||||
(1, pd.DataFrame({"A": [1]})),
|
||||
(pd.Series([1]), 1),
|
||||
(pd.DataFrame({"A": [1]}), 1),
|
||||
(pd.Series([1]), pd.Series([1])),
|
||||
(pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})),
|
||||
(pd.Series([1]), pd.DataFrame({"A": [1]})),
|
||||
(pd.DataFrame({"A": [1]}), pd.Series([1])),
|
||||
],
|
||||
ids=lambda x: f"({type(x[0]).__name__},{type(x[1]).__name__})",
|
||||
)
|
||||
def test_binops(request, args, annotate, all_binary_operators):
|
||||
# This generates 624 tests... Is that needed?
|
||||
left, right = args
|
||||
if isinstance(left, (pd.DataFrame, pd.Series)):
|
||||
left.attrs = {}
|
||||
if isinstance(right, (pd.DataFrame, pd.Series)):
|
||||
right.attrs = {}
|
||||
|
||||
if annotate == "left" and isinstance(left, int):
|
||||
pytest.skip("left is an int and doesn't support .attrs")
|
||||
if annotate == "right" and isinstance(right, int):
|
||||
pytest.skip("right is an int and doesn't support .attrs")
|
||||
|
||||
if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
|
||||
if not all_binary_operators.__name__.startswith("r"):
|
||||
if annotate == "right" and isinstance(left, type(right)):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when right has "
|
||||
f"attrs and both are {type(left)}"
|
||||
)
|
||||
)
|
||||
if not isinstance(left, type(right)):
|
||||
if annotate == "left" and isinstance(left, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
elif annotate == "right" and isinstance(right, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
else:
|
||||
if annotate == "left" and isinstance(left, type(right)):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when left has "
|
||||
f"attrs and both are {type(left)}"
|
||||
)
|
||||
)
|
||||
if not isinstance(left, type(right)):
|
||||
if annotate == "right" and isinstance(right, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
elif annotate == "left" and isinstance(left, pd.Series):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{all_binary_operators} doesn't work when the "
|
||||
"objects are different Series has attrs"
|
||||
)
|
||||
)
|
||||
if annotate in {"left", "both"} and not isinstance(left, int):
|
||||
left.attrs = {"a": 1}
|
||||
if annotate in {"right", "both"} and not isinstance(right, int):
|
||||
right.attrs = {"a": 1}
|
||||
|
||||
is_cmp = all_binary_operators in [
|
||||
operator.eq,
|
||||
operator.ne,
|
||||
operator.gt,
|
||||
operator.ge,
|
||||
operator.lt,
|
||||
operator.le,
|
||||
]
|
||||
if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
|
||||
# in 2.0 silent alignment on comparisons was removed xref GH#28759
|
||||
left, right = left.align(right, axis=1, copy=False)
|
||||
elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
|
||||
right, left = right.align(left, axis=1, copy=False)
|
||||
|
||||
result = all_binary_operators(left, right)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accessors
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("capitalize"),
|
||||
operator.methodcaller("casefold"),
|
||||
operator.methodcaller("cat", ["a"]),
|
||||
operator.methodcaller("contains", "a"),
|
||||
operator.methodcaller("count", "a"),
|
||||
operator.methodcaller("encode", "utf-8"),
|
||||
operator.methodcaller("endswith", "a"),
|
||||
operator.methodcaller("extract", r"(\w)(\d)"),
|
||||
operator.methodcaller("extract", r"(\w)(\d)", expand=False),
|
||||
operator.methodcaller("find", "a"),
|
||||
operator.methodcaller("findall", "a"),
|
||||
operator.methodcaller("get", 0),
|
||||
operator.methodcaller("index", "a"),
|
||||
operator.methodcaller("len"),
|
||||
operator.methodcaller("ljust", 4),
|
||||
operator.methodcaller("lower"),
|
||||
operator.methodcaller("lstrip"),
|
||||
operator.methodcaller("match", r"\w"),
|
||||
operator.methodcaller("normalize", "NFC"),
|
||||
operator.methodcaller("pad", 4),
|
||||
operator.methodcaller("partition", "a"),
|
||||
operator.methodcaller("repeat", 2),
|
||||
operator.methodcaller("replace", "a", "b"),
|
||||
operator.methodcaller("rfind", "a"),
|
||||
operator.methodcaller("rindex", "a"),
|
||||
operator.methodcaller("rjust", 4),
|
||||
operator.methodcaller("rpartition", "a"),
|
||||
operator.methodcaller("rstrip"),
|
||||
operator.methodcaller("slice", 4),
|
||||
operator.methodcaller("slice_replace", 1, repl="a"),
|
||||
operator.methodcaller("startswith", "a"),
|
||||
operator.methodcaller("strip"),
|
||||
operator.methodcaller("swapcase"),
|
||||
operator.methodcaller("translate", {"a": "b"}),
|
||||
operator.methodcaller("upper"),
|
||||
operator.methodcaller("wrap", 4),
|
||||
operator.methodcaller("zfill", 4),
|
||||
operator.methodcaller("isalnum"),
|
||||
operator.methodcaller("isalpha"),
|
||||
operator.methodcaller("isdigit"),
|
||||
operator.methodcaller("isspace"),
|
||||
operator.methodcaller("islower"),
|
||||
operator.methodcaller("isupper"),
|
||||
operator.methodcaller("istitle"),
|
||||
operator.methodcaller("isnumeric"),
|
||||
operator.methodcaller("isdecimal"),
|
||||
operator.methodcaller("get_dummies"),
|
||||
],
|
||||
ids=idfn,
|
||||
)
|
||||
def test_string_method(method):
|
||||
s = pd.Series(["a1"])
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.str)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("to_period"),
|
||||
operator.methodcaller("tz_localize", "CET"),
|
||||
operator.methodcaller("normalize"),
|
||||
operator.methodcaller("strftime", "%Y"),
|
||||
operator.methodcaller("round", "h"),
|
||||
operator.methodcaller("floor", "h"),
|
||||
operator.methodcaller("ceil", "h"),
|
||||
operator.methodcaller("month_name"),
|
||||
operator.methodcaller("day_name"),
|
||||
],
|
||||
ids=idfn,
|
||||
)
|
||||
def test_datetime_method(method):
|
||||
s = pd.Series(pd.date_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.dt)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr",
|
||||
[
|
||||
"date",
|
||||
"time",
|
||||
"timetz",
|
||||
"year",
|
||||
"month",
|
||||
"day",
|
||||
"hour",
|
||||
"minute",
|
||||
"second",
|
||||
"microsecond",
|
||||
"nanosecond",
|
||||
"dayofweek",
|
||||
"day_of_week",
|
||||
"dayofyear",
|
||||
"day_of_year",
|
||||
"quarter",
|
||||
"is_month_start",
|
||||
"is_month_end",
|
||||
"is_quarter_start",
|
||||
"is_quarter_end",
|
||||
"is_year_start",
|
||||
"is_year_end",
|
||||
"is_leap_year",
|
||||
"daysinmonth",
|
||||
"days_in_month",
|
||||
],
|
||||
)
|
||||
def test_datetime_property(attr):
|
||||
s = pd.Series(pd.date_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = getattr(s.dt, attr)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"attr", ["days", "seconds", "microseconds", "nanoseconds", "components"]
|
||||
)
|
||||
def test_timedelta_property(attr):
|
||||
s = pd.Series(pd.timedelta_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = getattr(s.dt, attr)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")])
|
||||
def test_timedelta_methods(method):
|
||||
s = pd.Series(pd.timedelta_range("2000", periods=4))
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.dt)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("add_categories", ["c"]),
|
||||
operator.methodcaller("as_ordered"),
|
||||
operator.methodcaller("as_unordered"),
|
||||
lambda x: getattr(x, "codes"),
|
||||
operator.methodcaller("remove_categories", "a"),
|
||||
operator.methodcaller("remove_unused_categories"),
|
||||
operator.methodcaller("rename_categories", {"a": "A", "b": "B"}),
|
||||
operator.methodcaller("reorder_categories", ["b", "a"]),
|
||||
operator.methodcaller("set_categories", ["A", "B"]),
|
||||
],
|
||||
)
|
||||
@not_implemented_mark
|
||||
def test_categorical_accessor(method):
|
||||
s = pd.Series(["a", "b"], dtype="category")
|
||||
s.attrs = {"a": 1}
|
||||
result = method(s.cat)
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Groupby
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
operator.methodcaller("sum"),
|
||||
lambda x: x.apply(lambda y: y),
|
||||
lambda x: x.agg("sum"),
|
||||
lambda x: x.agg("mean"),
|
||||
lambda x: x.agg("median"),
|
||||
],
|
||||
)
|
||||
def test_groupby_finalize(obj, method):
|
||||
obj.attrs = {"a": 1}
|
||||
result = method(obj.groupby([0, 0], group_keys=False))
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.agg(["sum", "count"]),
|
||||
lambda x: x.agg("std"),
|
||||
lambda x: x.agg("var"),
|
||||
lambda x: x.agg("sem"),
|
||||
lambda x: x.agg("size"),
|
||||
lambda x: x.agg("ohlc"),
|
||||
],
|
||||
)
|
||||
@not_implemented_mark
|
||||
def test_groupby_finalize_not_implemented(obj, method):
|
||||
obj.attrs = {"a": 1}
|
||||
result = method(obj.groupby([0, 0]))
|
||||
assert result.attrs == {"a": 1}
|
||||
|
||||
|
||||
def test_finalize_frame_series_name():
|
||||
# https://github.com/pandas-dev/pandas/pull/37186/files#r506978889
|
||||
# ensure we don't copy the column `name` to the Series.
|
||||
df = pd.DataFrame({"name": [1, 2]})
|
||||
result = pd.Series([1, 2]).__finalize__(df)
|
||||
assert result.name is None
|
209
lib/python3.13/site-packages/pandas/tests/generic/test_frame.py
Normal file
209
lib/python3.13/site-packages/pandas/tests/generic/test_frame.py
Normal file
@ -0,0 +1,209 @@
|
||||
from copy import deepcopy
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrame:
|
||||
@pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"])
|
||||
def test_set_axis_name(self, func):
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
|
||||
result = methodcaller(func, "foo")(df)
|
||||
assert df.index.name is None
|
||||
assert result.index.name == "foo"
|
||||
|
||||
result = methodcaller(func, "cols", axis=1)(df)
|
||||
assert df.columns.name is None
|
||||
assert result.columns.name == "cols"
|
||||
|
||||
@pytest.mark.parametrize("func", ["_set_axis_name", "rename_axis"])
|
||||
def test_set_axis_name_mi(self, func):
|
||||
df = DataFrame(
|
||||
np.empty((3, 3)),
|
||||
index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]),
|
||||
columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]),
|
||||
)
|
||||
|
||||
level_names = ["L1", "L2"]
|
||||
|
||||
result = methodcaller(func, level_names)(df)
|
||||
assert result.index.names == level_names
|
||||
assert result.columns.names == [None, None]
|
||||
|
||||
result = methodcaller(func, level_names, axis=1)(df)
|
||||
assert result.columns.names == ["L1", "L2"]
|
||||
assert result.index.names == [None, None]
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
# allow single item via bool method
|
||||
msg_warn = (
|
||||
"DataFrame.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
df = DataFrame([[True]])
|
||||
df1 = DataFrame([[False]])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert df.bool()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert not df1.bool()
|
||||
|
||||
df = DataFrame([[False, False]])
|
||||
msg_err = "The truth value of a DataFrame is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
bool(df)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
df.bool()
|
||||
|
||||
def test_metadata_propagation_indiv_groupby(self):
|
||||
# groupby
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
|
||||
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
|
||||
"C": np.random.default_rng(2).standard_normal(8),
|
||||
"D": np.random.default_rng(2).standard_normal(8),
|
||||
}
|
||||
)
|
||||
result = df.groupby("A").sum()
|
||||
tm.assert_metadata_equivalent(df, result)
|
||||
|
||||
def test_metadata_propagation_indiv_resample(self):
|
||||
# resample
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((1000, 2)),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
)
|
||||
result = df.resample("1min")
|
||||
tm.assert_metadata_equivalent(df, result)
|
||||
|
||||
def test_metadata_propagation_indiv(self, monkeypatch):
|
||||
# merging with override
|
||||
# GH 6923
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "merge":
|
||||
left, right = other.left, other.right
|
||||
value = getattr(left, name, "") + "|" + getattr(right, name, "")
|
||||
object.__setattr__(self, name, value)
|
||||
elif method == "concat":
|
||||
value = "+".join(
|
||||
[getattr(o, name) for o in other.objs if getattr(o, name, None)]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, ""))
|
||||
|
||||
return self
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(DataFrame, "_metadata", ["filename"])
|
||||
m.setattr(DataFrame, "__finalize__", finalize)
|
||||
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["a", "b"]
|
||||
)
|
||||
df2 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=["c", "d"]
|
||||
)
|
||||
DataFrame._metadata = ["filename"]
|
||||
df1.filename = "fname1.csv"
|
||||
df2.filename = "fname2.csv"
|
||||
|
||||
result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner")
|
||||
assert result.filename == "fname1.csv|fname2.csv"
|
||||
|
||||
# concat
|
||||
# GH#6927
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).integers(0, 4, (3, 2)), columns=list("ab")
|
||||
)
|
||||
df1.filename = "foo"
|
||||
|
||||
result = pd.concat([df1, df1])
|
||||
assert result.filename == "foo+foo"
|
||||
|
||||
def test_set_attribute(self):
|
||||
# Test for consistent setattr behavior when an attribute and a column
|
||||
# have the same name (Issue #8994)
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
df.y = 2
|
||||
df["y"] = [2, 4, 6]
|
||||
df.y = 5
|
||||
|
||||
assert df.y == 5
|
||||
tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y"))
|
||||
|
||||
def test_deepcopy_empty(self):
|
||||
# This test covers empty frame copying with non-empty column sets
|
||||
# as reported in issue GH15370
|
||||
empty_frame = DataFrame(data=[], index=[], columns=["A"])
|
||||
empty_frame_copy = deepcopy(empty_frame)
|
||||
|
||||
tm.assert_frame_equal(empty_frame_copy, empty_frame)
|
||||
|
||||
|
||||
# formerly in Generic but only test DataFrame
|
||||
class TestDataFrame2:
|
||||
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_bool_args(self, value):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
msg = 'For argument "inplace" expected type bool, received type'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().rename_axis(mapper={"a": "x", "b": "y"}, axis=1, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().drop("a", axis=1, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().fillna(value=0, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().replace(to_replace=1, value=7, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().interpolate(inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy()._where(cond=df.a > 2, inplace=value)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.copy().mask(cond=df.a > 2, inplace=value)
|
||||
|
||||
def test_unexpected_keyword(self):
|
||||
# GH8597
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 2)), columns=["jim", "joe"]
|
||||
)
|
||||
ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
|
||||
ts = df["joe"].copy()
|
||||
ts[2] = np.nan
|
||||
|
||||
msg = "unexpected keyword"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.drop("joe", axis=1, in_place=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.reindex([1, 0], inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ca.fillna(0, inplace=True)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ts.fillna(0, in_place=True)
|
@ -0,0 +1,504 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Generic types test cases
|
||||
|
||||
|
||||
def construct(box, shape, value=None, dtype=None, **kwargs):
|
||||
"""
|
||||
construct an object for the given shape
|
||||
if value is specified use that if its a scalar
|
||||
if value is an array, repeat it as needed
|
||||
"""
|
||||
if isinstance(shape, int):
|
||||
shape = tuple([shape] * box._AXIS_LEN)
|
||||
if value is not None:
|
||||
if is_scalar(value):
|
||||
if value == "empty":
|
||||
arr = None
|
||||
dtype = np.float64
|
||||
|
||||
# remove the info axis
|
||||
kwargs.pop(box._info_axis_name, None)
|
||||
else:
|
||||
arr = np.empty(shape, dtype=dtype)
|
||||
arr.fill(value)
|
||||
else:
|
||||
fshape = np.prod(shape)
|
||||
arr = value.ravel()
|
||||
new_shape = fshape / arr.shape[0]
|
||||
if fshape % arr.shape[0] != 0:
|
||||
raise Exception("invalid value passed in construct")
|
||||
|
||||
arr = np.repeat(arr, new_shape).reshape(shape)
|
||||
else:
|
||||
arr = np.random.default_rng(2).standard_normal(shape)
|
||||
return box(arr, dtype=dtype, **kwargs)
|
||||
|
||||
|
||||
class TestGeneric:
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
str.lower,
|
||||
{x: x.lower() for x in list("ABCD")},
|
||||
Series({x: x.lower() for x in list("ABCD")}),
|
||||
],
|
||||
)
|
||||
def test_rename(self, frame_or_series, func):
|
||||
# single axis
|
||||
idx = list("ABCD")
|
||||
|
||||
for axis in frame_or_series._AXIS_ORDERS:
|
||||
kwargs = {axis: idx}
|
||||
obj = construct(frame_or_series, 4, **kwargs)
|
||||
|
||||
# rename a single axis
|
||||
result = obj.rename(**{axis: func})
|
||||
expected = obj.copy()
|
||||
setattr(expected, axis, list("abcd"))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_get_numeric_data(self, frame_or_series):
|
||||
n = 4
|
||||
kwargs = {
|
||||
frame_or_series._get_axis_name(i): list(range(n))
|
||||
for i in range(frame_or_series._AXIS_LEN)
|
||||
}
|
||||
|
||||
# get the numeric data
|
||||
o = construct(frame_or_series, n, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
tm.assert_equal(result, o)
|
||||
|
||||
# non-inclusion
|
||||
result = o._get_bool_data()
|
||||
expected = construct(frame_or_series, n, value="empty", **kwargs)
|
||||
if isinstance(o, DataFrame):
|
||||
# preserve columns dtype
|
||||
expected.columns = o.columns[:0]
|
||||
# https://github.com/pandas-dev/pandas/issues/50862
|
||||
tm.assert_equal(result.reset_index(drop=True), expected)
|
||||
|
||||
# get the bool data
|
||||
arr = np.array([True, True, False, True])
|
||||
o = construct(frame_or_series, n, value=arr, **kwargs)
|
||||
result = o._get_numeric_data()
|
||||
tm.assert_equal(result, o)
|
||||
|
||||
def test_nonzero(self, frame_or_series):
|
||||
# GH 4633
|
||||
# look at the boolean/nonzero behavior for objects
|
||||
obj = construct(frame_or_series, shape=4)
|
||||
msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = construct(frame_or_series, shape=4, value=1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
obj = construct(frame_or_series, shape=4, value=np.nan)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj == 1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# empty
|
||||
obj = construct(frame_or_series, shape=0)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(obj)
|
||||
|
||||
# invalid behaviors
|
||||
|
||||
obj1 = construct(frame_or_series, shape=4, value=1)
|
||||
obj2 = construct(frame_or_series, shape=4, value=1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
if obj1:
|
||||
pass
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 and obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj1 or obj2
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
not obj1
|
||||
|
||||
def test_frame_or_series_compound_dtypes(self, frame_or_series):
|
||||
# see gh-5191
|
||||
# Compound dtypes should raise NotImplementedError.
|
||||
|
||||
def f(dtype):
|
||||
return construct(frame_or_series, shape=3, value=1, dtype=dtype)
|
||||
|
||||
msg = (
|
||||
"compound dtypes are not implemented "
|
||||
f"in the {frame_or_series.__name__} constructor"
|
||||
)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
|
||||
|
||||
# these work (though results may be unexpected)
|
||||
f("int64")
|
||||
f("float64")
|
||||
f("M8[ns]")
|
||||
|
||||
def test_metadata_propagation(self, frame_or_series):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
o = construct(frame_or_series, shape=3)
|
||||
o.name = "foo"
|
||||
o2 = construct(frame_or_series, shape=3)
|
||||
o2.name = "bar"
|
||||
|
||||
# ----------
|
||||
# preserving
|
||||
# ----------
|
||||
|
||||
# simple ops with scalars
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(1)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# ops with like
|
||||
for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
|
||||
result = getattr(o, op)(o)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
v1 = getattr(o, op)(o)
|
||||
tm.assert_metadata_equivalent(o, v1)
|
||||
tm.assert_metadata_equivalent(o, v1 & v1)
|
||||
tm.assert_metadata_equivalent(o, v1 | v1)
|
||||
|
||||
# combine_first
|
||||
result = o.combine_first(o2)
|
||||
tm.assert_metadata_equivalent(o, result)
|
||||
|
||||
# ---------------------------
|
||||
# non-preserving (by default)
|
||||
# ---------------------------
|
||||
|
||||
# add non-like
|
||||
result = o + o2
|
||||
tm.assert_metadata_equivalent(result)
|
||||
|
||||
# simple boolean
|
||||
for op in ["__eq__", "__le__", "__ge__"]:
|
||||
# this is a name matching op
|
||||
v1 = getattr(o, op)(o)
|
||||
v2 = getattr(o, op)(o2)
|
||||
tm.assert_metadata_equivalent(v2)
|
||||
tm.assert_metadata_equivalent(v1 & v2)
|
||||
tm.assert_metadata_equivalent(v1 | v2)
|
||||
|
||||
def test_size_compat(self, frame_or_series):
|
||||
# GH8846
|
||||
# size property should be defined
|
||||
|
||||
o = construct(frame_or_series, shape=10)
|
||||
assert o.size == np.prod(o.shape)
|
||||
assert o.size == 10 ** len(o.axes)
|
||||
|
||||
def test_split_compat(self, frame_or_series):
|
||||
# xref GH8846
|
||||
o = construct(frame_or_series, shape=10)
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False
|
||||
):
|
||||
assert len(np.array_split(o, 5)) == 5
|
||||
assert len(np.array_split(o, 2)) == 2
|
||||
|
||||
# See gh-12301
|
||||
def test_stat_unexpected_keyword(self, frame_or_series):
|
||||
obj = construct(frame_or_series, 5)
|
||||
starwars = "Star Wars"
|
||||
errmsg = "unexpected keyword"
|
||||
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.max(epic=starwars) # stat_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.var(epic=starwars) # stat_function_ddof
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.sum(epic=starwars) # cum_function
|
||||
with pytest.raises(TypeError, match=errmsg):
|
||||
obj.any(epic=starwars) # logical_function
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
|
||||
def test_api_compat(self, func, frame_or_series):
|
||||
# GH 12021
|
||||
# compat for __name__, __qualname__
|
||||
|
||||
obj = construct(frame_or_series, 5)
|
||||
f = getattr(obj, func)
|
||||
assert f.__name__ == func
|
||||
assert f.__qualname__.endswith(func)
|
||||
|
||||
def test_stat_non_defaults_args(self, frame_or_series):
|
||||
obj = construct(frame_or_series, 5)
|
||||
out = np.array([0])
|
||||
errmsg = "the 'out' parameter is not supported"
|
||||
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.max(out=out) # stat_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.var(out=out) # stat_function_ddof
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.sum(out=out) # cum_function
|
||||
with pytest.raises(ValueError, match=errmsg):
|
||||
obj.any(out=out) # logical_function
|
||||
|
||||
def test_truncate_out_of_bounds(self, frame_or_series):
|
||||
# GH11382
|
||||
|
||||
# small
|
||||
shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
|
||||
small = construct(frame_or_series, shape, dtype="int8", value=1)
|
||||
tm.assert_equal(small.truncate(), small)
|
||||
tm.assert_equal(small.truncate(before=0, after=3e3), small)
|
||||
tm.assert_equal(small.truncate(before=-1, after=2e3), small)
|
||||
|
||||
# big
|
||||
shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
|
||||
big = construct(frame_or_series, shape, dtype="int8", value=1)
|
||||
tm.assert_equal(big.truncate(), big)
|
||||
tm.assert_equal(big.truncate(before=0, after=3e6), big)
|
||||
tm.assert_equal(big.truncate(before=-1, after=2e6), big)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
|
||||
)
|
||||
@pytest.mark.parametrize("shape", [0, 1, 2])
|
||||
def test_copy_and_deepcopy(self, frame_or_series, shape, func):
|
||||
# GH 15444
|
||||
obj = construct(frame_or_series, shape)
|
||||
obj_copy = func(obj)
|
||||
assert obj_copy is not obj
|
||||
tm.assert_equal(obj_copy, obj)
|
||||
|
||||
def test_data_deprecated(self, frame_or_series):
|
||||
obj = frame_or_series()
|
||||
msg = "(Series|DataFrame)._data is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
mgr = obj._data
|
||||
assert mgr is obj._mgr
|
||||
|
||||
|
||||
class TestNDFrame:
|
||||
# tests that don't fit elsewhere
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_squeeze_series_noop(self, ser):
|
||||
# noop
|
||||
tm.assert_series_equal(ser.squeeze(), ser)
|
||||
|
||||
def test_squeeze_frame_noop(self):
|
||||
# noop
|
||||
df = DataFrame(np.eye(2))
|
||||
tm.assert_frame_equal(df.squeeze(), df)
|
||||
|
||||
def test_squeeze_frame_reindex(self):
|
||||
# squeezing
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).reindex(columns=["A"])
|
||||
tm.assert_series_equal(df.squeeze(), df["A"])
|
||||
|
||||
def test_squeeze_0_len_dim(self):
|
||||
# don't fail with 0 length dimensions GH11229 & GH8999
|
||||
empty_series = Series([], name="five", dtype=np.float64)
|
||||
empty_frame = DataFrame([empty_series])
|
||||
tm.assert_series_equal(empty_series, empty_series.squeeze())
|
||||
tm.assert_series_equal(empty_series, empty_frame.squeeze())
|
||||
|
||||
def test_squeeze_axis(self):
|
||||
# axis argument
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((1, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=1, freq="B"),
|
||||
).iloc[:, :1]
|
||||
assert df.shape == (1, 1)
|
||||
tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
|
||||
tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
|
||||
tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
|
||||
assert df.squeeze() == df.iloc[0, 0]
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis=2)
|
||||
msg = "No axis named x for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.squeeze(axis="x")
|
||||
|
||||
def test_squeeze_axis_len_3(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=3, freq="B"),
|
||||
)
|
||||
tm.assert_frame_equal(df.squeeze(axis=0), df)
|
||||
|
||||
def test_numpy_squeeze(self):
|
||||
s = Series(range(2), dtype=np.float64)
|
||||
tm.assert_series_equal(np.squeeze(s), s)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).reindex(columns=["A"])
|
||||
tm.assert_series_equal(np.squeeze(df), df["A"])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_transpose_series(self, ser):
|
||||
# calls implementation in pandas/core/base.py
|
||||
tm.assert_series_equal(ser.transpose(), ser)
|
||||
|
||||
def test_transpose_frame(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
tm.assert_frame_equal(df.transpose().transpose(), df)
|
||||
|
||||
def test_numpy_transpose(self, frame_or_series):
|
||||
obj = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
if frame_or_series is Series:
|
||||
# 1D -> np.transpose is no-op
|
||||
tm.assert_series_equal(np.transpose(obj), obj)
|
||||
|
||||
# round-trip preserved
|
||||
tm.assert_equal(np.transpose(np.transpose(obj)), obj)
|
||||
|
||||
msg = "the 'axes' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.transpose(obj, axes=1)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser",
|
||||
[
|
||||
Series(range(10), dtype=np.float64),
|
||||
Series([str(i) for i in range(10)], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_take_series(self, ser):
|
||||
indices = [1, 5, -2, 6, 3, -1]
|
||||
out = ser.take(indices)
|
||||
expected = Series(
|
||||
data=ser.values.take(indices),
|
||||
index=ser.index.take(indices),
|
||||
dtype=ser.dtype,
|
||||
)
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
def test_take_frame(self):
|
||||
indices = [1, 5, -2, 6, 3, -1]
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
out = df.take(indices)
|
||||
expected = DataFrame(
|
||||
data=df.values.take(indices, axis=0),
|
||||
index=df.index.take(indices),
|
||||
columns=df.columns,
|
||||
)
|
||||
tm.assert_frame_equal(out, expected)
|
||||
|
||||
def test_take_invalid_kwargs(self, frame_or_series):
|
||||
indices = [-3, 2, 0, 1]
|
||||
|
||||
obj = DataFrame(range(5))
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.take(indices, mode="clip")
|
||||
|
||||
def test_axis_classmethods(self, frame_or_series):
|
||||
box = frame_or_series
|
||||
obj = box(dtype=object)
|
||||
values = box._AXIS_TO_AXIS_NUMBER.keys()
|
||||
for v in values:
|
||||
assert obj._get_axis_number(v) == box._get_axis_number(v)
|
||||
assert obj._get_axis_name(v) == box._get_axis_name(v)
|
||||
assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
|
||||
|
||||
def test_flags_identity(self, frame_or_series):
|
||||
obj = Series([1, 2])
|
||||
if frame_or_series is DataFrame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
assert obj.flags is obj.flags
|
||||
obj2 = obj.copy()
|
||||
assert obj2.flags is not obj.flags
|
||||
|
||||
def test_bool_dep(self) -> None:
|
||||
# GH-51749
|
||||
msg_warn = (
|
||||
"DataFrame.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
DataFrame({"col": [False]}).bool()
|
@ -0,0 +1,336 @@
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.missing import array_equivalent
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# Fixtures
|
||||
# ========
|
||||
@pytest.fixture
|
||||
def df():
|
||||
"""DataFrame with columns 'L1', 'L2', and 'L3'"""
|
||||
return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]})
|
||||
|
||||
|
||||
@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]])
|
||||
def df_levels(request, df):
|
||||
"""DataFrame with columns or index levels 'L1', 'L2', and 'L3'"""
|
||||
levels = request.param
|
||||
|
||||
if levels:
|
||||
df = df.set_index(levels)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_ambig(df):
|
||||
"""DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'"""
|
||||
df = df.set_index(["L1", "L2"])
|
||||
|
||||
df["L1"] = df["L3"]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df_duplabels(df):
|
||||
"""DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'"""
|
||||
df = df.set_index(["L1"])
|
||||
df = pd.concat([df, df["L2"]], axis=1)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# Test is label/level reference
|
||||
# =============================
|
||||
def get_labels_levels(df_levels):
|
||||
expected_labels = list(df_levels.columns)
|
||||
expected_levels = [name for name in df_levels.index.names if name is not None]
|
||||
return expected_labels, expected_levels
|
||||
|
||||
|
||||
def assert_label_reference(frame, labels, axis):
|
||||
for label in labels:
|
||||
assert frame._is_label_reference(label, axis=axis)
|
||||
assert not frame._is_level_reference(label, axis=axis)
|
||||
assert frame._is_label_or_level_reference(label, axis=axis)
|
||||
|
||||
|
||||
def assert_level_reference(frame, levels, axis):
|
||||
for level in levels:
|
||||
assert frame._is_level_reference(level, axis=axis)
|
||||
assert not frame._is_label_reference(level, axis=axis)
|
||||
assert frame._is_label_or_level_reference(level, axis=axis)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_is_level_or_label_reference_df_simple(df_levels, axis):
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_level_reference(df_levels, expected_levels, axis=axis)
|
||||
assert_label_reference(df_levels, expected_labels, axis=axis)
|
||||
|
||||
|
||||
def test_is_level_reference_df_ambig(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has both an on-axis level and off-axis label named L1
|
||||
# Therefore L1 should reference the label, not the level
|
||||
assert_label_reference(df_ambig, ["L1"], axis=axis)
|
||||
|
||||
# df has an on-axis level named L2 and it is not ambiguous
|
||||
# Therefore L2 is an level reference
|
||||
assert_level_reference(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has a column named L3 and it not an level reference
|
||||
assert_label_reference(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_is_level_reference_series_simple_axis0(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_reference(s, ["L1"], axis=0)
|
||||
assert not s._is_level_reference("L2")
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_reference(s, ["L1", "L2"], axis=0)
|
||||
assert not s._is_level_reference("L3")
|
||||
|
||||
|
||||
def test_is_level_reference_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._is_level_reference("L1", axis=1)
|
||||
|
||||
|
||||
# Test _check_label_or_level_ambiguity_df
|
||||
# =======================================
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_check_label_or_level_ambiguity_df(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
msg = "'L1' is both a column level and an index label"
|
||||
|
||||
else:
|
||||
msg = "'L1' is both an index level and a column label"
|
||||
# df_ambig has both an on-axis level and off-axis label named L1
|
||||
# Therefore, L1 is ambiguous.
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df_ambig._check_label_or_level_ambiguity("L1", axis=axis)
|
||||
|
||||
# df_ambig has an on-axis level named L2,, and it is not ambiguous.
|
||||
df_ambig._check_label_or_level_ambiguity("L2", axis=axis)
|
||||
|
||||
# df_ambig has an off-axis label named L3, and it is not ambiguous
|
||||
assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_check_label_or_level_ambiguity_series(df):
|
||||
# A series has no columns and therefore references are never ambiguous
|
||||
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
s._check_label_or_level_ambiguity("L1", axis=0)
|
||||
s._check_label_or_level_ambiguity("L2", axis=0)
|
||||
s._check_label_or_level_ambiguity("L3", axis=0)
|
||||
|
||||
|
||||
def test_check_label_or_level_ambiguity_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._check_label_or_level_ambiguity("L1", axis=1)
|
||||
|
||||
|
||||
# Test _get_label_or_level_values
|
||||
# ===============================
|
||||
def assert_label_values(frame, labels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for label in labels:
|
||||
if axis == 0:
|
||||
expected = frame[label]._values
|
||||
else:
|
||||
expected = frame.loc[label]._values
|
||||
|
||||
result = frame._get_label_or_level_values(label, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
def assert_level_values(frame, levels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for level in levels:
|
||||
if axis == 0:
|
||||
expected = frame.index.get_level_values(level=level)._values
|
||||
else:
|
||||
expected = frame.columns.get_level_values(level=level)._values
|
||||
|
||||
result = frame._get_label_or_level_values(level, axis=axis)
|
||||
assert array_equivalent(expected, result)
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_get_label_or_level_values_df_simple(df_levels, axis):
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_label_values(df_levels, expected_labels, axis=axis)
|
||||
assert_level_values(df_levels, expected_levels, axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_ambig(df_ambig, axis):
|
||||
axis = df_ambig._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_ambig = df_ambig.T
|
||||
|
||||
# df has an on-axis level named L2, and it is not ambiguous.
|
||||
assert_level_values(df_ambig, ["L2"], axis=axis)
|
||||
|
||||
# df has an off-axis label named L3, and it is not ambiguous.
|
||||
assert_label_values(df_ambig, ["L3"], axis=axis)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_df_duplabels(df_duplabels, axis):
|
||||
axis = df_duplabels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_duplabels = df_duplabels.T
|
||||
|
||||
# df has unambiguous level 'L1'
|
||||
assert_level_values(df_duplabels, ["L1"], axis=axis)
|
||||
|
||||
# df has unique label 'L3'
|
||||
assert_label_values(df_duplabels, ["L3"], axis=axis)
|
||||
|
||||
# df has duplicate labels 'L2'
|
||||
if axis == 0:
|
||||
expected_msg = "The column label 'L2' is not unique"
|
||||
else:
|
||||
expected_msg = "The index label 'L2' is not unique"
|
||||
|
||||
with pytest.raises(ValueError, match=expected_msg):
|
||||
assert_label_values(df_duplabels, ["L2"], axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_get_label_or_level_values_series_axis0(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_level_values(s, ["L1"], axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_level_values(s, ["L1", "L2"], axis=0)
|
||||
|
||||
|
||||
def test_get_label_or_level_values_series_axis1_error(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1"):
|
||||
s._get_label_or_level_values("L1", axis=1)
|
||||
|
||||
|
||||
# Test _drop_labels_or_levels
|
||||
# ===========================
|
||||
def assert_labels_dropped(frame, labels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for label in labels:
|
||||
df_dropped = frame._drop_labels_or_levels(label, axis=axis)
|
||||
|
||||
if axis == 0:
|
||||
assert label in frame.columns
|
||||
assert label not in df_dropped.columns
|
||||
else:
|
||||
assert label in frame.index
|
||||
assert label not in df_dropped.index
|
||||
|
||||
|
||||
def assert_levels_dropped(frame, levels, axis):
|
||||
axis = frame._get_axis_number(axis)
|
||||
for level in levels:
|
||||
df_dropped = frame._drop_labels_or_levels(level, axis=axis)
|
||||
|
||||
if axis == 0:
|
||||
assert level in frame.index.names
|
||||
assert level not in df_dropped.index.names
|
||||
else:
|
||||
assert level in frame.columns.names
|
||||
assert level not in df_dropped.columns.names
|
||||
|
||||
|
||||
# DataFrame
|
||||
# ---------
|
||||
def test_drop_labels_or_levels_df(df_levels, axis):
|
||||
# Compute expected labels and levels
|
||||
expected_labels, expected_levels = get_labels_levels(df_levels)
|
||||
|
||||
axis = df_levels._get_axis_number(axis)
|
||||
# Transpose frame if axis == 1
|
||||
if axis == 1:
|
||||
df_levels = df_levels.T
|
||||
|
||||
# Perform checks
|
||||
assert_labels_dropped(df_levels, expected_labels, axis=axis)
|
||||
assert_levels_dropped(df_levels, expected_levels, axis=axis)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
df_levels._drop_labels_or_levels("L4", axis=axis)
|
||||
|
||||
|
||||
# Series
|
||||
# ------
|
||||
def test_drop_labels_or_levels_series(df):
|
||||
# Make series with L1 as index
|
||||
s = df.set_index("L1").L2
|
||||
assert_levels_dropped(s, ["L1"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
||||
|
||||
# Make series with L1 and L2 as index
|
||||
s = df.set_index(["L1", "L2"]).L3
|
||||
assert_levels_dropped(s, ["L1", "L2"], axis=0)
|
||||
|
||||
with pytest.raises(ValueError, match="not valid labels or levels"):
|
||||
s._drop_labels_or_levels("L4", axis=0)
|
159
lib/python3.13/site-packages/pandas/tests/generic/test_series.py
Normal file
159
lib/python3.13/site-packages/pandas/tests/generic/test_series.py
Normal file
@ -0,0 +1,159 @@
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeries:
|
||||
@pytest.mark.parametrize("func", ["rename_axis", "_set_axis_name"])
|
||||
def test_set_axis_name_mi(self, func):
|
||||
ser = Series(
|
||||
[11, 21, 31],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"]
|
||||
),
|
||||
)
|
||||
|
||||
result = methodcaller(func, ["L1", "L2"])(ser)
|
||||
assert ser.index.name is None
|
||||
assert ser.index.names == ["l1", "l2"]
|
||||
assert result.index.name is None
|
||||
assert result.index.names, ["L1", "L2"]
|
||||
|
||||
def test_set_axis_name_raises(self):
|
||||
ser = Series([1])
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser._set_axis_name(name="a", axis=1)
|
||||
|
||||
def test_get_bool_data_preserve_dtype(self):
|
||||
ser = Series([True, False, True])
|
||||
result = ser._get_bool_data()
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
def test_nonzero_single_element(self):
|
||||
# allow single item via bool method
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
ser = Series([True])
|
||||
ser1 = Series([False])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert ser.bool()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
assert not ser1.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [np.nan, pd.NaT, True, False])
|
||||
def test_nonzero_single_element_raise_1(self, data):
|
||||
# single item nan to raise
|
||||
series = Series([data])
|
||||
|
||||
msg = "The truth value of a Series is ambiguous"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bool(series)
|
||||
|
||||
@pytest.mark.parametrize("data", [np.nan, pd.NaT])
|
||||
def test_nonzero_single_element_raise_2(self, data):
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err = "bool cannot act on a non-boolean single element Series"
|
||||
series = Series([data])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
series.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [(True, True), (False, False)])
|
||||
def test_nonzero_multiple_element_raise(self, data):
|
||||
# multiple bool are still an error
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err = "The truth value of a Series is ambiguous"
|
||||
series = Series([data])
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
bool(series)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err):
|
||||
series.bool()
|
||||
|
||||
@pytest.mark.parametrize("data", [1, 0, "a", 0.0])
|
||||
def test_nonbool_single_element_raise(self, data):
|
||||
# single non-bool are an error
|
||||
msg_warn = (
|
||||
"Series.bool is now deprecated and will be removed "
|
||||
"in future version of pandas"
|
||||
)
|
||||
msg_err1 = "The truth value of a Series is ambiguous"
|
||||
msg_err2 = "bool cannot act on a non-boolean single element Series"
|
||||
series = Series([data])
|
||||
with pytest.raises(ValueError, match=msg_err1):
|
||||
bool(series)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg_warn):
|
||||
with pytest.raises(ValueError, match=msg_err2):
|
||||
series.bool()
|
||||
|
||||
def test_metadata_propagation_indiv_resample(self):
|
||||
# resample
|
||||
ts = Series(
|
||||
np.random.default_rng(2).random(1000),
|
||||
index=date_range("20130101", periods=1000, freq="s"),
|
||||
name="foo",
|
||||
)
|
||||
result = ts.resample("1min").mean()
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
result = ts.resample("1min").min()
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
result = ts.resample("1min").apply(lambda x: x.sum())
|
||||
tm.assert_metadata_equivalent(ts, result)
|
||||
|
||||
def test_metadata_propagation_indiv(self, monkeypatch):
|
||||
# check that the metadata matches up on the resulting ops
|
||||
|
||||
ser = Series(range(3), range(3))
|
||||
ser.name = "foo"
|
||||
ser2 = Series(range(3), range(3))
|
||||
ser2.name = "bar"
|
||||
|
||||
result = ser.T
|
||||
tm.assert_metadata_equivalent(ser, result)
|
||||
|
||||
def finalize(self, other, method=None, **kwargs):
|
||||
for name in self._metadata:
|
||||
if method == "concat" and name == "filename":
|
||||
value = "+".join(
|
||||
[
|
||||
getattr(obj, name)
|
||||
for obj in other.objs
|
||||
if getattr(obj, name, None)
|
||||
]
|
||||
)
|
||||
object.__setattr__(self, name, value)
|
||||
else:
|
||||
object.__setattr__(self, name, getattr(other, name, None))
|
||||
|
||||
return self
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Series, "_metadata", ["name", "filename"])
|
||||
m.setattr(Series, "__finalize__", finalize)
|
||||
|
||||
ser.filename = "foo"
|
||||
ser2.filename = "bar"
|
||||
|
||||
result = pd.concat([ser, ser2])
|
||||
assert result.filename == "foo+bar"
|
||||
assert result.name is None
|
@ -0,0 +1,130 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytest.importorskip("xarray")
|
||||
|
||||
|
||||
class TestDataFrameToXArray:
|
||||
@pytest.fixture
|
||||
def df(self):
|
||||
return DataFrame(
|
||||
{
|
||||
"a": list("abcd"),
|
||||
"b": list(range(1, 5)),
|
||||
"c": np.arange(3, 7).astype("u1"),
|
||||
"d": np.arange(4.0, 8.0, dtype="float64"),
|
||||
"e": [True, False, True, False],
|
||||
"f": Categorical(list("abcd")),
|
||||
"g": date_range("20130101", periods=4),
|
||||
"h": date_range("20130101", periods=4, tz="US/Eastern"),
|
||||
}
|
||||
)
|
||||
|
||||
def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
|
||||
index = index_flat
|
||||
# MultiIndex is tested in test_to_xarray_with_multiindex
|
||||
if len(index) == 0:
|
||||
pytest.skip("Test doesn't make sense for empty index")
|
||||
|
||||
from xarray import Dataset
|
||||
|
||||
df.index = index[:4]
|
||||
df.index.name = "foo"
|
||||
df.columns.name = "bar"
|
||||
result = df.to_xarray()
|
||||
assert result.sizes["foo"] == 4
|
||||
assert len(result.coords) == 1
|
||||
assert len(result.data_vars) == 8
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
# idempotency
|
||||
# datetimes w/tz are preserved
|
||||
# column names are lost
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected.columns.name = None
|
||||
tm.assert_frame_equal(result.to_dataframe(), expected)
|
||||
|
||||
def test_to_xarray_empty(self, df):
|
||||
from xarray import Dataset
|
||||
|
||||
df.index.name = "foo"
|
||||
result = df[0:0].to_xarray()
|
||||
assert result.sizes["foo"] == 0
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
def test_to_xarray_with_multiindex(self, df, using_infer_string):
|
||||
from xarray import Dataset
|
||||
|
||||
# MultiIndex
|
||||
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
|
||||
result = df.to_xarray()
|
||||
assert result.sizes["one"] == 1
|
||||
assert result.sizes["two"] == 4
|
||||
assert len(result.coords) == 2
|
||||
assert len(result.data_vars) == 8
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, Dataset)
|
||||
|
||||
result = result.to_dataframe()
|
||||
expected = df.copy()
|
||||
expected["f"] = expected["f"].astype(
|
||||
object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
)
|
||||
expected.columns.name = None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesToXArray:
|
||||
def test_to_xarray_index_types(self, index_flat):
|
||||
index = index_flat
|
||||
# MultiIndex is tested in test_to_xarray_with_multiindex
|
||||
|
||||
from xarray import DataArray
|
||||
|
||||
ser = Series(range(len(index)), index=index, dtype="int64")
|
||||
ser.index.name = "foo"
|
||||
result = ser.to_xarray()
|
||||
repr(result)
|
||||
assert len(result) == len(index)
|
||||
assert len(result.coords) == 1
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
# idempotency
|
||||
tm.assert_series_equal(result.to_series(), ser)
|
||||
|
||||
def test_to_xarray_empty(self):
|
||||
from xarray import DataArray
|
||||
|
||||
ser = Series([], dtype=object)
|
||||
ser.index.name = "foo"
|
||||
result = ser.to_xarray()
|
||||
assert len(result) == 0
|
||||
assert len(result.coords) == 1
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
|
||||
assert isinstance(result, DataArray)
|
||||
|
||||
def test_to_xarray_with_multiindex(self):
|
||||
from xarray import DataArray
|
||||
|
||||
mi = MultiIndex.from_product([["a", "b"], range(3)], names=["one", "two"])
|
||||
ser = Series(range(6), dtype="int64", index=mi)
|
||||
result = ser.to_xarray()
|
||||
assert len(result) == 2
|
||||
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
|
||||
assert isinstance(result, DataArray)
|
||||
res = result.to_series()
|
||||
tm.assert_series_equal(res, ser)
|
Reference in New Issue
Block a user