Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,7 @@
|
||||
from pandas.core.groupby.base import transformation_kernels
|
||||
|
||||
# There is no Series.cumcount or DataFrame.cumcount
|
||||
series_transform_kernels = [
|
||||
x for x in sorted(transformation_kernels) if x != "cumcount"
|
||||
]
|
||||
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]
|
1733
lib/python3.13/site-packages/pandas/tests/apply/test_frame_apply.py
Normal file
1733
lib/python3.13/site-packages/pandas/tests/apply/test_frame_apply.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,113 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_agg_relabel():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
|
||||
# simplest case with one column, one func
|
||||
result = df.agg(foo=("B", "sum"))
|
||||
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test on same column with different methods
|
||||
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
|
||||
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_relabel_multi_columns_multi_methods():
|
||||
# GH 26513, test on multiple columns with multiple methods
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
result = df.agg(
|
||||
foo=("A", "sum"),
|
||||
bar=("B", "mean"),
|
||||
cat=("A", "min"),
|
||||
dat=("B", "max"),
|
||||
f=("A", "max"),
|
||||
g=("C", "min"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
|
||||
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
|
||||
def test_agg_relabel_partial_functions():
|
||||
# GH 26513, test on partial, functools or more complex cases
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
msg = "using Series.[mean|min]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "using Series.[mean|min|max|sum]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(
|
||||
foo=("A", min),
|
||||
bar=("A", np.min),
|
||||
cat=("B", max),
|
||||
dat=("C", "min"),
|
||||
f=("B", np.sum),
|
||||
kk=("B", lambda x: min(x)),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
|
||||
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_namedtuple():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("B", "sum"),
|
||||
bar=pd.NamedAgg("B", "min"),
|
||||
cat=pd.NamedAgg(column="B", aggfunc="count"),
|
||||
fft=pd.NamedAgg("B", aggfunc="max"),
|
||||
)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("A", "min"),
|
||||
bar=pd.NamedAgg(column="B", aggfunc="max"),
|
||||
cat=pd.NamedAgg(column="A", aggfunc="max"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
|
||||
index=pd.Index(["foo", "bar", "cat"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_reconstruct_func():
|
||||
# GH 28472, test to ensure reconstruct_func isn't moved;
|
||||
# This method is used by other libraries (e.g. dask)
|
||||
result = pd.core.apply.reconstruct_func("min")
|
||||
expected = (False, "min", None, None)
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,264 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import frame_transform_kernels
|
||||
from pandas.tests.frame.common import zip_frames
|
||||
|
||||
|
||||
def unpack_obj(obj, klass, axis):
|
||||
"""
|
||||
Helper to ensure we have the right type of object for a test parametrized
|
||||
over frame_or_series.
|
||||
"""
|
||||
if klass is not DataFrame:
|
||||
obj = obj["A"]
|
||||
if axis != 0:
|
||||
pytest.skip(f"Test is only for DataFrame with axis={axis}")
|
||||
return obj
|
||||
|
||||
|
||||
def test_transform_ufunc(axis, float_frame, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(obj)
|
||||
|
||||
# ufunc
|
||||
result = obj.transform(np.sqrt, axis=axis)
|
||||
expected = f_sqrt
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(axis, float_frame, ops, names):
|
||||
# GH 35964
|
||||
other_axis = 1 if axis in {0, "index"} else 0
|
||||
with np.errstate(all="ignore"):
|
||||
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
|
||||
if axis in {0, "index"}:
|
||||
expected.columns = MultiIndex.from_product([float_frame.columns, names])
|
||||
else:
|
||||
expected.index = MultiIndex.from_product([float_frame.index, names])
|
||||
result = float_frame.transform(ops, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ops", [[], np.array([])])
|
||||
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = df.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame(
|
||||
[[8, 8], [9, 9], [10, 10]],
|
||||
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(axis, float_frame, box):
|
||||
# GH 35964
|
||||
if axis in (0, "index"):
|
||||
e = float_frame.columns[0]
|
||||
expected = float_frame[[e]].transform(np.abs)
|
||||
else:
|
||||
e = float_frame.index[0]
|
||||
expected = float_frame.iloc[[0]].transform(np.abs)
|
||||
result = float_frame.transform(box({e: np.abs}), axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{},
|
||||
{"A": []},
|
||||
{"A": [], "B": "cumsum"},
|
||||
{"A": "cumsum", "B": []},
|
||||
{"A": [], "B": ["cumsum"]},
|
||||
{"A": ["cumsum"], "B": []},
|
||||
],
|
||||
)
|
||||
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
def func(x):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
return x + 1
|
||||
|
||||
result = obj.transform(func, axis=axis)
|
||||
expected = obj + 1
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
|
||||
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
|
||||
def test_transform_bad_dtype(op, frame_or_series, request):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform(op)
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform([op])
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": op})
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_kernels_raise)
|
||||
def test_transform_failure_typeerror(request, op):
|
||||
# GH 35964
|
||||
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_failure_valueerror():
|
||||
# GH 40211
|
||||
def op(x):
|
||||
if np.sum(np.sum(x)) < 10:
|
||||
raise ValueError
|
||||
return x
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_passes_args(use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
expected_args = [1, 2]
|
||||
expected_kwargs = {"c": 3}
|
||||
|
||||
def f(x, a, b, c):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
assert [a, b] == expected_args
|
||||
assert c == expected_kwargs["c"]
|
||||
return x
|
||||
|
||||
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
|
||||
|
||||
|
||||
def test_transform_empty_dataframe():
|
||||
# https://github.com/pandas-dev/pandas/issues/39636
|
||||
df = DataFrame([], columns=["col1", "col2"])
|
||||
result = df.transform(lambda x: x + 10)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df["col1"].transform(lambda x: x + 10)
|
||||
tm.assert_series_equal(result, df["col1"])
|
@ -0,0 +1,361 @@
|
||||
# Tests specifically aimed at detecting bad arguments.
|
||||
# This file is organized by reason for exception.
|
||||
# 1. always invalid argument values
|
||||
# 2. missing column(s)
|
||||
# 3. incompatible ops/dtype/args/kwargs
|
||||
# 4. invalid result shape/type
|
||||
# If your test does not fit into one of these categories, add to this list.
|
||||
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SpecificationError
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result_type", ["foo", 1])
|
||||
def test_result_type_error(result_type):
|
||||
# allowed result_type
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
msg = (
|
||||
"invalid value for result_type, must be one of "
|
||||
"{None, 'reduce', 'broadcast', 'expand'}"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
|
||||
|
||||
|
||||
def test_apply_invalid_axis_value():
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: x, 2)
|
||||
|
||||
|
||||
def test_agg_raises():
|
||||
# GH 26513
|
||||
df = DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
msg = "Must provide"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg()
|
||||
|
||||
|
||||
def test_map_with_invalid_na_action_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/32815
|
||||
s = Series([1, 2, 3])
|
||||
msg = "na_action must either be 'ignore' or None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map(lambda x: x, na_action="____")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_na_action", ["____", True])
|
||||
def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action):
|
||||
# https://github.com/pandas-dev/pandas/issues/46588
|
||||
s = Series([1, 2, 3])
|
||||
msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map({1: 2}, na_action=input_na_action)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
|
||||
def test_nested_renamer(frame_or_series, method, func):
|
||||
# GH 35964
|
||||
obj = frame_or_series({"A": [1]})
|
||||
match = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"renamer",
|
||||
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
|
||||
)
|
||||
def test_series_nested_renamer(renamer):
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
s.agg(renamer)
|
||||
|
||||
|
||||
def test_apply_dict_depr():
|
||||
tsdf = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 3)),
|
||||
columns=["A", "B", "C"],
|
||||
index=date_range("1/1/2000", periods=10),
|
||||
)
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
tsdf.A.agg({"foo": ["sum", "mean"]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["agg", "transform"])
|
||||
def test_dict_nested_renaming_depr(method):
|
||||
df = DataFrame({"A": range(5), "B": 5})
|
||||
|
||||
# nested renaming
|
||||
msg = r"nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
|
||||
def test_missing_column(method, func):
|
||||
# GH 40004
|
||||
obj = DataFrame({"A": [1]})
|
||||
match = re.escape("Column(s) ['B'] do not exist")
|
||||
with pytest.raises(KeyError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
def test_transform_mixed_column_name_dtypes():
|
||||
# GH39025
|
||||
df = DataFrame({"a": ["1"]})
|
||||
msg = r"Column\(s\) \[1, 'b'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.transform({"a": int, 1: str, "b": int})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
|
||||
)
|
||||
def test_apply_str_axis_1_raises(how, args):
|
||||
# GH 39211 - some ops don't support axis=1
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
msg = f"Operation {how} does not support axis=1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(how, axis=1, args=args)
|
||||
|
||||
|
||||
def test_transform_axis_1_raises():
|
||||
# GH 35964
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1]).transform("sum", axis=1)
|
||||
|
||||
|
||||
def test_apply_modify_traceback():
|
||||
data = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
],
|
||||
"B": [
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"two",
|
||||
"two",
|
||||
"one",
|
||||
],
|
||||
"C": [
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"shiny",
|
||||
],
|
||||
"D": np.random.default_rng(2).standard_normal(11),
|
||||
"E": np.random.default_rng(2).standard_normal(11),
|
||||
"F": np.random.default_rng(2).standard_normal(11),
|
||||
}
|
||||
)
|
||||
|
||||
data.loc[4, "C"] = np.nan
|
||||
|
||||
def transform(row):
|
||||
if row["C"].startswith("shin") and row["A"] == "foo":
|
||||
row["D"] = 7
|
||||
return row
|
||||
|
||||
msg = "'float' object has no attribute 'startswith'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
data.apply(transform, axis=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
|
||||
# GH 21224
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = "can't multiply sequence by non-int of type 'str'|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with pytest.raises(expected, match=msg):
|
||||
with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
|
||||
df.agg(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("mean", TypeError), # mean raises TypeError
|
||||
("prod", TypeError),
|
||||
("std", TypeError),
|
||||
("var", TypeError),
|
||||
("median", TypeError),
|
||||
("cumprod", TypeError),
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_series(series, func, expected, using_infer_string):
|
||||
# GH21224
|
||||
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
|
||||
if func == "median" or func is np.nanmedian or func is np.median:
|
||||
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = msg + "|does not support|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
|
||||
with pytest.raises(expected, match=msg):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
series.agg(func)
|
||||
|
||||
|
||||
def test_agg_none_to_type():
|
||||
# GH 40543
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = re.escape("int() argument must be a string")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
def test_transform_none_to_type():
|
||||
# GH#34377
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = "argument must be a"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: np.array([1, 2]).reshape(-1, 2),
|
||||
lambda x: [1, 2],
|
||||
lambda x: Series([1, 2]),
|
||||
],
|
||||
)
|
||||
def test_apply_broadcast_error(func):
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
# > 1 ndim
|
||||
msg = "too many dims to broadcast|cannot broadcast result"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(func, axis=1, result_type="broadcast")
|
||||
|
||||
|
||||
def test_transform_and_agg_err_agg(axis, float_frame):
|
||||
# cannot both transform and agg
|
||||
msg = "cannot combine transform and aggregation operations"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
float_frame.agg(["max", "sqrt"], axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning") # GH53325
|
||||
@pytest.mark.parametrize(
|
||||
"func, msg",
|
||||
[
|
||||
(["sqrt", "max"], "cannot combine transform and aggregation"),
|
||||
(
|
||||
{"foo": np.sqrt, "bar": "sum"},
|
||||
"cannot perform both aggregation and transformation",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_transform_and_agg_err_series(string_series, func, msg):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
|
||||
def test_transform_wont_agg_frame(axis, float_frame, func):
|
||||
# GH 35964
|
||||
# cannot both transform and agg
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame.transform(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
|
||||
def test_transform_wont_agg_series(string_series, func):
|
||||
# GH 35964
|
||||
# we are trying to transform with an aggregator
|
||||
msg = "Function did not transform"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
string_series.transform(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
|
||||
)
|
||||
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
|
||||
# GH 35964
|
||||
op = op_wrapper(all_reductions)
|
||||
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.transform(op)
|
118
lib/python3.13/site-packages/pandas/tests/apply/test_numba.py
Normal file
118
lib/python3.13/site-packages/pandas/tests/apply/test_numba.py
Normal file
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu]
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 1])
|
||||
def apply_axis(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_numba_vs_python_noop(float_frame, apply_axis):
|
||||
func = lambda x: x
|
||||
result = float_frame.apply(func, engine="numba", axis=apply_axis)
|
||||
expected = float_frame.apply(func, engine="python", axis=apply_axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_vs_python_string_index():
|
||||
# GH#56189
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
1,
|
||||
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
|
||||
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
|
||||
)
|
||||
func = lambda x: x
|
||||
result = df.apply(func, engine="numba", axis=0)
|
||||
expected = df.apply(func, engine="python", axis=0)
|
||||
tm.assert_frame_equal(
|
||||
result, expected, check_column_type=False, check_index_type=False
|
||||
)
|
||||
|
||||
|
||||
def test_numba_vs_python_indexing():
|
||||
frame = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
|
||||
index=Index(["A", "B", "C"]),
|
||||
)
|
||||
row_func = lambda x: x["c"]
|
||||
result = frame.apply(row_func, engine="numba", axis=1)
|
||||
expected = frame.apply(row_func, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
col_func = lambda x: x["A"]
|
||||
result = frame.apply(col_func, engine="numba", axis=0)
|
||||
expected = frame.apply(col_func, engine="python", axis=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reduction",
|
||||
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()],
|
||||
)
|
||||
def test_numba_vs_python_reductions(reduction, apply_axis):
|
||||
df = DataFrame(np.ones((4, 4), dtype=np.float64))
|
||||
result = df.apply(reduction, engine="numba", axis=apply_axis)
|
||||
expected = df.apply(reduction, engine="python", axis=apply_axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]])
|
||||
def test_numba_numeric_colnames(colnames):
|
||||
# Check that numeric column names lower properly and can be indxed on
|
||||
df = DataFrame(
|
||||
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames
|
||||
)
|
||||
first_col = colnames[0]
|
||||
f = lambda x: x[first_col] # Get the first column
|
||||
result = df.apply(f, engine="numba", axis=1)
|
||||
expected = df.apply(f, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_parallel_unsupported(float_frame):
|
||||
f = lambda x: x
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Parallel apply is not supported when raw=False and engine='numba'",
|
||||
):
|
||||
float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True})
|
||||
|
||||
|
||||
def test_numba_nonunique_unsupported(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"]))
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="The index/columns must be unique when raw=False and engine='numba'",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
|
||||
def test_numba_unsupported_dtypes(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
|
||||
df["c"] = df["c"].astype("double[pyarrow]")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column b must have a numeric dtype. Found 'object|string' instead",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column c is backed by an extension array, "
|
||||
"which is not supported by the numba engine.",
|
||||
):
|
||||
df["c"].to_frame().apply(f, engine="numba", axis=apply_axis)
|
@ -0,0 +1,701 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import series_transform_kernels
|
||||
|
||||
|
||||
@pytest.fixture(params=[False, "compat"])
|
||||
def by_row(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_series_map_box_timedelta(by_row):
|
||||
# GH#11349
|
||||
ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h"))
|
||||
|
||||
def f(x):
|
||||
return x.total_seconds() if by_row else x.dt.total_seconds()
|
||||
|
||||
result = ser.apply(f, by_row=by_row)
|
||||
|
||||
expected = ser.map(lambda x: x.total_seconds())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([86401.0, 90001.0, 93601.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(datetime_series, by_row):
|
||||
result = datetime_series.apply(np.sqrt, by_row=by_row)
|
||||
with np.errstate(all="ignore"):
|
||||
expected = np.sqrt(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# element-wise apply (ufunc)
|
||||
result = datetime_series.apply(np.exp, by_row=by_row)
|
||||
expected = np.exp(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3], dtype=np.float64)
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
|
||||
def test_apply_map_same_length_inference_bug():
|
||||
s = Series([1, 2])
|
||||
|
||||
def f(x):
|
||||
return (x, x + 1)
|
||||
|
||||
result = s.apply(f, by_row="compat")
|
||||
expected = s.map(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("convert_dtype", [True, False])
|
||||
def test_apply_convert_dtype_deprecated(convert_dtype):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
def func(x):
|
||||
return x if x > 0 else np.nan
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.apply(func, convert_dtype=convert_dtype, by_row="compat")
|
||||
|
||||
|
||||
def test_apply_args():
|
||||
s = Series(["foo,bar"])
|
||||
|
||||
result = s.apply(str.split, args=(",",))
|
||||
assert result[0] == ["foo", "bar"]
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
msg = (
|
||||
"in Series.agg cannot aggregate and has been deprecated. "
|
||||
"Use Series.transform to keep behavior unchanged."
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.agg(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_mapping_func_deprecated():
|
||||
# GH 53325
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = "using .+ in Series.agg cannot aggregate and"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg(foo1, 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg([foo1, foo2], 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
|
||||
|
||||
|
||||
def test_series_apply_map_box_timestamps(by_row):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(date_range("1/1/2000", periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'hour'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(func, by_row=by_row)
|
||||
return
|
||||
|
||||
result = ser.apply(func, by_row=by_row)
|
||||
expected = ser.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_box_dt64():
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
ser = Series(vals, dtype="M8[ns]")
|
||||
assert ser.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_dt64tz():
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
ser = Series(vals, dtype="M8[ns, US/Eastern]")
|
||||
assert ser.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_td64():
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "timedelta64[ns]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_period():
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "Period[M]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_datetimetz(by_row):
|
||||
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
|
||||
s = Series(values, name="XX")
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row)
|
||||
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
return str(x.tz) if by_row else str(x.dt.tz)
|
||||
|
||||
result = s.apply(f, by_row=by_row)
|
||||
if by_row:
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
else:
|
||||
assert result == "Asia/Tokyo"
|
||||
|
||||
|
||||
def test_apply_categorical(by_row, using_infer_string):
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
ser = Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'lower"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
assert ser.apply(lambda x: "A", by_row=by_row) == "A"
|
||||
return
|
||||
|
||||
result = ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
|
||||
# should be categorical dtype when the number of categories are
|
||||
# the same
|
||||
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp.values)
|
||||
|
||||
result = ser.apply(lambda x: "A")
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
|
||||
def test_apply_categorical_with_nan_values(series, by_row):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
s = Series(series, dtype="category")
|
||||
if not by_row:
|
||||
msg = "'Series' object has no attribute 'split'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
return
|
||||
|
||||
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
result = result.astype(object)
|
||||
expected = Series(["1", "1", np.nan], dtype="category")
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_empty_integer_series_with_datetime_index(by_row):
|
||||
# GH 21245
|
||||
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
|
||||
result = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_apply_dataframe_iloc():
|
||||
uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"])
|
||||
indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"])
|
||||
|
||||
def retrieve(targetRow, targetDF):
|
||||
val = targetDF["Numbers"].iloc[targetRow]
|
||||
return val
|
||||
|
||||
result = indexDF["Indices"].apply(retrieve, args=(uintDF,))
|
||||
expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform(string_series, by_row):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.apply(np.sqrt, by_row=by_row)
|
||||
expected = f_sqrt.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.apply([np.sqrt], by_row=by_row)
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ["sqrt"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.apply(["sqrt"], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["sqrt", "absolute"]
|
||||
result = string_series.apply([np.sqrt, np.abs], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
expected = expected.unstack().rename("series")
|
||||
|
||||
result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row)
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_partial_failure(op, request):
|
||||
# GH 35964
|
||||
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
ser = Series(3 * [object])
|
||||
|
||||
if op in ("fillna", "ngroup"):
|
||||
error = ValueError
|
||||
msg = "Transform function failed"
|
||||
else:
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform([op, "shift"])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": op, "B": "shift"})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op], "B": ["shift"]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_partial_failure_valueerror():
|
||||
# GH 40211
|
||||
def noop(x):
|
||||
return x
|
||||
|
||||
def raising_op(_):
|
||||
raise ValueError
|
||||
|
||||
ser = Series(3 * [object])
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform([noop, raising_op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": raising_op, "B": noop})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [raising_op], "B": [noop]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [noop, raising_op], "B": [noop]})
|
||||
|
||||
|
||||
def test_demo():
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
|
||||
result = s.agg(["min", "max"])
|
||||
expected = Series([0, 5], index=["min", "max"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({"foo": "min"})
|
||||
expected = Series([0], index=["foo"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
|
||||
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
|
||||
# test that we are evaluating row-by-row first if by_row="compat"
|
||||
# else vectorized evaluation
|
||||
result = string_series.apply(func, by_row=by_row)
|
||||
|
||||
if by_row:
|
||||
expected = string_series.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
assert result == str(string_series)
|
||||
|
||||
|
||||
def test_agg_evaluate_lambdas(string_series):
|
||||
# GH53325
|
||||
# in the future, the result will be a Series class.
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(lambda x: type(x))
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(type)
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op_name", ["agg", "apply"])
|
||||
def test_with_nested_series(datetime_series, op_name):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
msg = "cannot aggregate"
|
||||
warning = FutureWarning if op_name == "agg" else None
|
||||
with tm.assert_produces_warning(warning, match=msg):
|
||||
# GH52123
|
||||
result = getattr(datetime_series, op_name)(
|
||||
lambda x: Series([x, x**2], index=["x", "x^2"])
|
||||
)
|
||||
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_replicate_describe(string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(
|
||||
{
|
||||
"count": "count",
|
||||
"mean": "mean",
|
||||
"std": "std",
|
||||
"min": "min",
|
||||
"25%": lambda x: x.quantile(0.25),
|
||||
"50%": "median",
|
||||
"75%": lambda x: x.quantile(0.75),
|
||||
"max": "max",
|
||||
},
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reduce(string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(["sum", "mean"])
|
||||
expected = Series(
|
||||
[string_series.sum(), string_series.mean()],
|
||||
["sum", "mean"],
|
||||
name=string_series.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwds",
|
||||
[("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})],
|
||||
)
|
||||
def test_non_callable_aggregates(how, kwds):
|
||||
# test agg using non-callable series attributes
|
||||
# GH 39116 - expand to apply
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = getattr(s, how)("size", **kwds)
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = getattr(s, how)(["size", "count", "mean"], **kwds)
|
||||
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_no_suffix_index(by_row):
|
||||
# GH36189
|
||||
s = Series([4] * 3)
|
||||
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row)
|
||||
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dti,exp",
|
||||
[
|
||||
(
|
||||
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
|
||||
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
|
||||
),
|
||||
(
|
||||
Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
),
|
||||
DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aware", [True, False])
|
||||
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
if aware:
|
||||
index = dti.tz_localize("UTC").index
|
||||
else:
|
||||
index = dti.index
|
||||
result = Series(index).apply(lambda x: Series([1, 2]))
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)]
|
||||
)
|
||||
def test_apply_scalar_on_date_time_index_aware_series(by_row, expected):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
series = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10, tz="UTC"),
|
||||
)
|
||||
result = Series(series.index).apply(lambda x: 1, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_timedelta(by_row):
|
||||
list_of_valid_strings = ["00:00:01", "00:00:02"]
|
||||
a = pd.to_timedelta(list_of_valid_strings)
|
||||
b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
|
||||
|
||||
a = pd.to_timedelta(list_of_strings)
|
||||
ser = Series(list_of_strings)
|
||||
b = ser.apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sum], ["sum"]),
|
||||
([np.sum, np.mean], ["sum", "mean"]),
|
||||
(np.array([np.sum]), ["sum"]),
|
||||
(np.array([np.sum, np.mean]), ["sum", "mean"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
|
||||
expected.name = "series"
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = f"using Series.[{'|'.join(names)}]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sum},
|
||||
{"A": np.sum, "B": np.mean},
|
||||
Series({"A": np.sum}),
|
||||
Series({"A": np.sum, "B": np.mean}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = "using Series.[sum|mean]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_transformer(string_series, ops, names, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})),
|
||||
([lambda x: x.sum()], Series([6], index=["<lambda>"])),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_lambda(ops, expected, by_row):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sqrt},
|
||||
{"A": np.sqrt, "B": np.exp},
|
||||
Series({"A": np.sqrt}),
|
||||
Series({"A": np.sqrt, "B": np.exp}),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_transformer(string_series, ops, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
(
|
||||
{"a": lambda x: x},
|
||||
Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])),
|
||||
),
|
||||
({"a": lambda x: x.sum()}, Series([6], index=["a"])),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_lambda(ops, by_row, expected):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_retains_column_name(by_row):
|
||||
# GH 16380
|
||||
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
|
||||
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
|
||||
expected = DataFrame(
|
||||
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
|
||||
columns=Index(range(3), name="y"),
|
||||
index=Index(range(3), name="x"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_type():
|
||||
# GH 46719
|
||||
s = Series([3, "string", float], index=["a", "b", "c"])
|
||||
result = s.apply(type)
|
||||
expected = Series([int, str, type], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_unpack_nested_data():
|
||||
# GH#55189
|
||||
ser = Series([[1, 2, 3], [4, 5, 6, 7]])
|
||||
result = ser.apply(lambda x: Series(x))
|
||||
expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_relabel_no_duplicated_method():
|
||||
# this is to test there is no duplicated method used in agg
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum")
|
||||
expected = df["A"].agg({"foo": "sum"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo="min", bar="max")
|
||||
expected = df["B"].agg({"foo": "min", "bar": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=sum, bar=min, cat="max")
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_relabel_duplicated_method():
|
||||
# this is to test with nested renaming, duplicated method can be used
|
||||
# if they are assigned with different new names
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum", bar="sum")
|
||||
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.min"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=min, bar="min")
|
||||
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
result = s.transform(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(string_series, ops, names):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.transform(ops)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = s.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(string_series, box):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = Series([1, 4])
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
326
lib/python3.13/site-packages/pandas/tests/apply/test_str.py
Normal file
326
lib/python3.13/site-packages/pandas/tests/apply/test_str.py
Normal file
@ -0,0 +1,326 @@
|
||||
from itertools import chain
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_number
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import (
|
||||
frame_transform_kernels,
|
||||
series_transform_kernels,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
|
||||
@pytest.mark.parametrize(
|
||||
"args,kwds",
|
||||
[
|
||||
pytest.param([], {}, id="no_args_or_kwds"),
|
||||
pytest.param([1], {}, id="axis_from_args"),
|
||||
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
|
||||
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
|
||||
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
|
||||
if len(args) > 1 and how == "agg":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
raises=TypeError,
|
||||
reason="agg/apply signature mismatch - agg passes 2nd "
|
||||
"argument to func",
|
||||
)
|
||||
)
|
||||
result = getattr(float_frame, how)(func, *args, **kwds)
|
||||
expected = getattr(float_frame, func)(*args, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"])
|
||||
def test_with_string_args(datetime_series, arg):
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_np_reducer(op, how):
|
||||
# GH 39116
|
||||
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = getattr(float_frame, how)(op)
|
||||
# pandas ddof defaults to 1, numpy to 0
|
||||
kwargs = {"ddof": 1} if op in ("std", "var") else {}
|
||||
expected = Series(
|
||||
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["transform", "apply"])
|
||||
def test_apply_np_transformer(float_frame, op, how):
|
||||
# GH 39116
|
||||
|
||||
# float_frame will _usually_ have negative values, which will
|
||||
# trigger the warning here, but let's put one in just to be sure
|
||||
float_frame.iloc[0, 0] = -1.0
|
||||
warn = None
|
||||
if op in ["log", "sqrt"]:
|
||||
warn = RuntimeWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, check_stacklevel=False):
|
||||
# float_frame fixture is defined in conftest.py, so we don't check the
|
||||
# stacklevel as otherwise the test would fail.
|
||||
result = getattr(float_frame, how)(op)
|
||||
expected = getattr(np, op)(float_frame)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("sum", 0),
|
||||
("max", np.nan),
|
||||
("min", np.nan),
|
||||
("all", True),
|
||||
("any", False),
|
||||
("mean", np.nan),
|
||||
("prod", 1),
|
||||
("std", np.nan),
|
||||
("var", np.nan),
|
||||
("median", np.nan),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("sum", 6),
|
||||
("max", 3),
|
||||
("min", 1),
|
||||
("all", True),
|
||||
("any", True),
|
||||
("mean", 2),
|
||||
("prod", 6),
|
||||
("std", 1),
|
||||
("var", 1),
|
||||
("median", 2),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("sum", "abc"),
|
||||
("max", "c"),
|
||||
("min", "a"),
|
||||
("all", True),
|
||||
("any", True),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_series(series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
if is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("cumprod", Series([], dtype=np.float64)),
|
||||
("cumsum", Series([], dtype=np.float64)),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("cumprod", Series([np.nan, 1, 2, 6])),
|
||||
("cumsum", Series([np.nan, 1, 3, 6])),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_series(series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(),
|
||||
[
|
||||
("sum", Series(dtype="float64")),
|
||||
("max", Series(dtype="float64")),
|
||||
("min", Series(dtype="float64")),
|
||||
("all", Series(dtype=bool)),
|
||||
("any", Series(dtype=bool)),
|
||||
("mean", Series(dtype="float64")),
|
||||
("prod", Series(dtype="float64")),
|
||||
("std", Series(dtype="float64")),
|
||||
("var", Series(dtype="float64")),
|
||||
("median", Series(dtype="float64")),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("sum", Series([1.0, 3])),
|
||||
("max", Series([1.0, 2])),
|
||||
("min", Series([1.0, 1])),
|
||||
("all", Series([True, True])),
|
||||
("any", Series([True, True])),
|
||||
("mean", Series([1, 1.5])),
|
||||
("prod", Series([1.0, 2])),
|
||||
("std", Series([np.nan, 0.707107])),
|
||||
("var", Series([np.nan, 0.5])),
|
||||
("median", Series([1, 1.5])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
|
||||
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
if axis in ("columns", 1):
|
||||
# operating blockwise doesn't let us preserve dtypes
|
||||
expected = expected.astype("float64")
|
||||
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_groupby_kernel_series(request, string_series, op):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
args = [0.0] if op == "fillna" else []
|
||||
ones = np.ones(string_series.shape[0])
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
msg = "SeriesGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
expected = string_series.groupby(ones).transform(op, *args)
|
||||
result = string_series.transform(op, 0, *args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_transform_kernels)
|
||||
def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# GH 35964
|
||||
|
||||
args = [0.0] if op == "fillna" else []
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
msg = "DataFrame.groupby with axis=1 is deprecated"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb = float_frame.groupby(ones, axis=axis)
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected = gb.transform(op, *args)
|
||||
|
||||
result = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# same thing, but ensuring we have multiple blocks
|
||||
assert "E" not in float_frame.columns
|
||||
float_frame["E"] = float_frame["A"].copy()
|
||||
assert len(float_frame._mgr.arrays) > 1
|
||||
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb2 = float_frame.groupby(ones, axis=axis)
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected2 = gb2.transform(op, *args)
|
||||
result2 = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
|
||||
def test_transform_method_name(method):
|
||||
# GH 19760
|
||||
df = DataFrame({"A": [-1, 2]})
|
||||
result = df.transform(method)
|
||||
expected = operator.methodcaller(method)(df)
|
||||
tm.assert_frame_equal(result, expected)
|
Reference in New Issue
Block a user